Movatterモバイル変換

Go to the documentation of this file.

1//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This transformation analyzes and transforms the induction variables (and

10// computations derived from them) into forms suitable for efficient execution

11// on the target.

12//

13// This pass performs a strength reduction on array references inside loops that

14// have as one or more of their components the loop induction variable, it

15// rewrites expressions to take advantage of scaled-index addressing modes

16// available on the target, and it performs a variety of other optimizations

17// related to loop induction variables.

18//

19// Terminology note: this code has a lot of handling for "post-increment" or

20// "post-inc" users. This is not talking about post-increment addressing modes;

21// it is instead talking about code like this:

22//

23// %i = phi [ 0, %entry ], [ %i.next, %latch ]

24// ...

25// %i.next = add %i, 1

26// %c = icmp eq %i.next, %n

27//

28// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however

29// it's useful to think about these as the same register, with some uses using

30// the value of the register before the add and some using it after. In this

31// example, the icmp is a post-increment user, since it uses %i.next, which is

32// the value of the induction variable after the increment. The other common

33// case of post-increment users is users outside the loop.

34//

35// TODO: More sophistication in the way Formulae are generated and filtered.

36//

37// TODO: Handle multiple loops at a time.

38//

39// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead

40// of a GlobalValue?

41//

42// TODO: When truncation is free, truncate ICmp users' operands to make it a

43// smaller encoding (on x86 at least).

44//

45// TODO: When a negated register is used by an add (such as in a list of

46// multiple base registers, or as the increment expression in an addrec),

47// we may not actually need both reg and (-1 * reg) in registers; the

48// negation can be implemented by using a sub instead of an add. The

49// lack of support for taking this into consideration when making

50// register pressure decisions is partly worked around by the "Special"

51// use kind.

52//

53//===----------------------------------------------------------------------===//

55#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"

56#include "llvm/ADT/APInt.h"

57#include "llvm/ADT/DenseMap.h"

58#include "llvm/ADT/DenseSet.h"

59#include "llvm/ADT/Hashing.h"

60#include "llvm/ADT/PointerIntPair.h"

61#include "llvm/ADT/STLExtras.h"

62#include "llvm/ADT/SetVector.h"

63#include "llvm/ADT/SmallBitVector.h"

64#include "llvm/ADT/SmallPtrSet.h"

65#include "llvm/ADT/SmallSet.h"

66#include "llvm/ADT/SmallVector.h"

67#include "llvm/ADT/Statistic.h"

68#include "llvm/ADT/iterator_range.h"

69#include "llvm/Analysis/AssumptionCache.h"

70#include "llvm/Analysis/DomTreeUpdater.h"

71#include "llvm/Analysis/IVUsers.h"

72#include "llvm/Analysis/LoopAnalysisManager.h"

73#include "llvm/Analysis/LoopInfo.h"

74#include "llvm/Analysis/LoopPass.h"

75#include "llvm/Analysis/MemorySSA.h"

76#include "llvm/Analysis/MemorySSAUpdater.h"

77#include "llvm/Analysis/ScalarEvolution.h"

78#include "llvm/Analysis/ScalarEvolutionExpressions.h"

79#include "llvm/Analysis/ScalarEvolutionNormalization.h"

80#include "llvm/Analysis/TargetLibraryInfo.h"

81#include "llvm/Analysis/TargetTransformInfo.h"

82#include "llvm/Analysis/ValueTracking.h"

83#include "llvm/BinaryFormat/Dwarf.h"

84#include "llvm/Config/llvm-config.h"

85#include "llvm/IR/BasicBlock.h"

86#include "llvm/IR/Constant.h"

87#include "llvm/IR/Constants.h"

88#include "llvm/IR/DebugInfoMetadata.h"

89#include "llvm/IR/DerivedTypes.h"

90#include "llvm/IR/Dominators.h"

91#include "llvm/IR/GlobalValue.h"

92#include "llvm/IR/IRBuilder.h"

93#include "llvm/IR/InstrTypes.h"

94#include "llvm/IR/Instruction.h"

95#include "llvm/IR/Instructions.h"

96#include "llvm/IR/IntrinsicInst.h"

97#include "llvm/IR/Module.h"

98#include "llvm/IR/Operator.h"

99#include "llvm/IR/Type.h"

100#include "llvm/IR/Use.h"

101#include "llvm/IR/User.h"

102#include "llvm/IR/Value.h"

103#include "llvm/IR/ValueHandle.h"

104#include "llvm/InitializePasses.h"

105#include "llvm/Pass.h"

106#include "llvm/Support/Casting.h"

107#include "llvm/Support/CommandLine.h"

108#include "llvm/Support/Compiler.h"

109#include "llvm/Support/Debug.h"

110#include "llvm/Support/ErrorHandling.h"

111#include "llvm/Support/MathExtras.h"

112#include "llvm/Support/raw_ostream.h"

113#include "llvm/Transforms/Scalar.h"

114#include "llvm/Transforms/Utils.h"

115#include "llvm/Transforms/Utils/BasicBlockUtils.h"

116#include "llvm/Transforms/Utils/Local.h"

117#include "llvm/Transforms/Utils/LoopUtils.h"

118#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

119#include <algorithm>

120#include <cassert>

121#include <cstddef>

122#include <cstdint>

123#include <iterator>

124#include <limits>

125#include <map>

126#include <numeric>

127#include <optional>

128#include <utility>

129

130using namespacellvm;

131

132#define DEBUG_TYPE "loop-reduce"

133

134/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for

135/// bail out. This threshold is far beyond the number of users that LSR can

136/// conceivably solve, so it should not affect generated code, but catches the

137/// worst cases before LSR burns too much compile time and stack space.

138staticconstunsignedMaxIVUsers = 200;

139

140/// Limit the size of expression that SCEV-based salvaging will attempt to

141/// translate into a DIExpression.

142/// Choose a maximum size such that debuginfo is not excessively increased and

143/// the salvaging is not too expensive for the compiler.

144staticconstunsignedMaxSCEVSalvageExpressionSize = 64;

145

146// Cleanup congruent phis after LSR phi expansion.

147staticcl::opt<bool>EnablePhiElim(

148"enable-lsr-phielim",cl::Hidden,cl::init(true),

149cl::desc("Enable LSR phi elimination"));

150

151// The flag adds instruction count to solutions cost comparison.

152staticcl::opt<bool>InsnsCost(

153"lsr-insns-cost",cl::Hidden,cl::init(true),

154cl::desc("Add instruction count to a LSR cost model"));

155

156// Flag to choose how to narrow complex lsr solution

157staticcl::opt<bool>LSRExpNarrow(

158"lsr-exp-narrow",cl::Hidden,cl::init(false),

159cl::desc("Narrow LSR complex solution using"

160" expectation of registers number"));

161

162// Flag to narrow search space by filtering non-optimal formulae with

163// the same ScaledReg and Scale.

164staticcl::opt<bool>FilterSameScaledReg(

165"lsr-filter-same-scaled-reg",cl::Hidden,cl::init(true),

166cl::desc("Narrow LSR search space by filtering non-optimal formulae"

167" with the same ScaledReg and Scale"));

168

169staticcl::opt<TTI::AddressingModeKind>PreferredAddresingMode(

170"lsr-preferred-addressing-mode",cl::Hidden,cl::init(TTI::AMK_None),

171cl::desc("A flag that overrides the target's preferred addressing mode."),

172cl::values(clEnumValN(TTI::AMK_None,

173"none",

174"Don't prefer any addressing mode"),

175clEnumValN(TTI::AMK_PreIndexed,

176"preindexed",

177"Prefer pre-indexed addressing mode"),

178clEnumValN(TTI::AMK_PostIndexed,

179"postindexed",

180"Prefer post-indexed addressing mode")));

181

182staticcl::opt<unsigned>ComplexityLimit(

183"lsr-complexity-limit",cl::Hidden,

184cl::init(std::numeric_limits<uint16_t>::max()),

185cl::desc("LSR search space complexity limit"));

186

187staticcl::opt<unsigned>SetupCostDepthLimit(

188"lsr-setupcost-depth-limit",cl::Hidden,cl::init(7),

189cl::desc("The limit on recursion depth for LSRs setup cost"));

190

191staticcl::opt<cl::boolOrDefault>AllowDropSolutionIfLessProfitable(

192"lsr-drop-solution",cl::Hidden,

193cl::desc("Attempt to drop solution if it is less profitable"));

194

195staticcl::opt<bool>EnableVScaleImmediates(

196"lsr-enable-vscale-immediates",cl::Hidden,cl::init(true),

197cl::desc("Enable analysis of vscale-relative immediates in LSR"));

198

199staticcl::opt<bool>DropScaledForVScale(

200"lsr-drop-scaled-reg-for-vscale",cl::Hidden,cl::init(true),

201cl::desc("Avoid using scaled registers with vscale-relative addressing"));

202

203#ifndef NDEBUG

204// Stress test IV chain generation.

205staticcl::opt<bool>StressIVChain(

206"stress-ivchain",cl::Hidden,cl::init(false),

207cl::desc("Stress test LSR IV chains"));

208#else

209staticboolStressIVChain =false;

210#endif

211

212namespace{

213

214structMemAccessTy {

215 /// Used in situations where the accessed memory type is unknown.

216staticconstunsigned UnknownAddressSpace =

217 std::numeric_limits<unsigned>::max();

218

219Type *MemTy =nullptr;

220unsigned AddrSpace = UnknownAddressSpace;

221

222 MemAccessTy() =default;

223 MemAccessTy(Type *Ty,unsigned AS) : MemTy(Ty), AddrSpace(AS) {}

224

225booloperator==(MemAccessTyOther) const{

226return MemTy ==Other.MemTy && AddrSpace ==Other.AddrSpace;

227 }

228

229booloperator!=(MemAccessTyOther) const{return !(*this ==Other); }

230

231static MemAccessTy getUnknown(LLVMContext &Ctx,

232unsigned AS = UnknownAddressSpace) {

233return MemAccessTy(Type::getVoidTy(Ctx), AS);

234 }

235

236Type *getType() {return MemTy; }

237};

238

239/// This class holds data which is used to order reuse candidates.

240classRegSortData {

241public:

242 /// This represents the set of LSRUse indices which reference

243 /// a particular register.

244SmallBitVector UsedByIndices;

245

246voidprint(raw_ostream &OS)const;

247voiddump()const;

248};

249

250// An offset from an address that is either scalable or fixed. Used for

251// per-target optimizations of addressing modes.

252classImmediate :publicdetails::FixedOrScalableQuantity<Immediate, int64_t> {

253constexpr Immediate(ScalarTy MinVal,bool Scalable)

254 : FixedOrScalableQuantity(MinVal, Scalable) {}

255

256constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)

257 : FixedOrScalableQuantity(V) {}

258

259public:

260constexpr Immediate() =delete;

261

262staticconstexpr Immediate getFixed(ScalarTy MinVal) {

263return {MinVal,false};

264 }

265staticconstexpr Immediate getScalable(ScalarTy MinVal) {

266return {MinVal,true};

267 }

268staticconstexpr Immediateget(ScalarTy MinVal,bool Scalable) {

269return {MinVal, Scalable};

270 }

271staticconstexpr Immediate getZero() {return {0,false}; }

272staticconstexpr Immediate getFixedMin() {

273return {std::numeric_limits<int64_t>::min(),false};

274 }

275staticconstexpr Immediate getFixedMax() {

276return {std::numeric_limits<int64_t>::max(),false};

277 }

278staticconstexpr Immediate getScalableMin() {

279return {std::numeric_limits<int64_t>::min(),true};

280 }

281staticconstexpr Immediate getScalableMax() {

282return {std::numeric_limits<int64_t>::max(),true};

283 }

284

285constexprbool isLessThanZero() const{return Quantity < 0; }

286

287constexprbool isGreaterThanZero() const{return Quantity > 0; }

288

289constexprbool isCompatibleImmediate(const Immediate &Imm) const{

290returnisZero() ||Imm.isZero() ||Imm.Scalable == Scalable;

291 }

292

293constexprbool isMin() const{

294return Quantity == std::numeric_limits<ScalarTy>::min();

295 }

296

297constexprbool isMax() const{

298return Quantity == std::numeric_limits<ScalarTy>::max();

299 }

300

301// Arithmetic 'operators' that cast to unsigned types first.

302constexpr Immediate addUnsigned(const Immediate &RHS) const{

303assert(isCompatibleImmediate(RHS) &&"Incompatible Immediates");

304 ScalarTyValue = (uint64_t)Quantity +RHS.getKnownMinValue();

305return {Value, Scalable ||RHS.isScalable()};

306 }

307

308constexpr Immediate subUnsigned(const Immediate &RHS) const{

309assert(isCompatibleImmediate(RHS) &&"Incompatible Immediates");

310 ScalarTyValue = (uint64_t)Quantity -RHS.getKnownMinValue();

311return {Value, Scalable ||RHS.isScalable()};

312 }

313

314// Scale the quantity by a constant without caring about runtime scalability.

315constexpr Immediate mulUnsigned(const ScalarTy RHS) const{

316 ScalarTyValue = (uint64_t)Quantity * RHS;

317return {Value, Scalable};

318 }

319

320// Helpers for generating SCEVs with vscale terms where needed.

321constSCEV *getSCEV(ScalarEvolution &SE,Type *Ty) const{

322constSCEV *S = SE.getConstant(Ty, Quantity);

323if (Scalable)

324 S = SE.getMulExpr(S, SE.getVScale(S->getType()));

325return S;

326 }

327

328constSCEV *getNegativeSCEV(ScalarEvolution &SE,Type *Ty) const{

329constSCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);

330if (Scalable)

331 NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType()));

332return NegS;

333 }

334

335constSCEV *getUnknownSCEV(ScalarEvolution &SE,Type *Ty) const{

336constSCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity));

337if (Scalable)

338 SU = SE.getMulExpr(SU, SE.getVScale(SU->getType()));

339return SU;

340 }

341};

342

343// This is needed for the Compare type of std::map when Immediate is used

344// as a key. We don't need it to be fully correct against any value of vscale,

345// just to make sure that vscale-related terms in the map are considered against

346// each other rather than being mixed up and potentially missing opportunities.

347structKeyOrderTargetImmediate {

348bool operator()(const Immediate &LHS,const Immediate &RHS) const{

349if (LHS.isScalable() && !RHS.isScalable())

350returnfalse;

351if (!LHS.isScalable() &&RHS.isScalable())

352returntrue;

353returnLHS.getKnownMinValue() <RHS.getKnownMinValue();

354 }

355};

356

357// This would be nicer if we could be generic instead of directly using size_t,

358// but there doesn't seem to be a type trait for is_orderable or

359// is_lessthan_comparable or similar.

360structKeyOrderSizeTAndImmediate {

361bool operator()(const std::pair<size_t, Immediate> &LHS,

362const std::pair<size_t, Immediate> &RHS) const{

363size_t LSize =LHS.first;

364size_t RSize =RHS.first;

365if (LSize != RSize)

366return LSize < RSize;

367return KeyOrderTargetImmediate()(LHS.second,RHS.second);

368 }

369};

370}// end anonymous namespace

371

372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

373void RegSortData::print(raw_ostream &OS) const{

374OS <<"[NumUses=" << UsedByIndices.count() <<']';

375}

376

377LLVM_DUMP_METHODvoid RegSortData::dump() const{

378print(errs());errs() <<'\n';

379}

380#endif

381

382namespace{

383

384/// Map register candidates to information about how they are used.

385classRegUseTracker {

386usingRegUsesTy =DenseMap<const SCEV *, RegSortData>;

387

388 RegUsesTy RegUsesMap;

389SmallVector<const SCEV *, 16>RegSequence;

390

391public:

392void countRegister(constSCEV *Reg,size_t LUIdx);

393void dropRegister(constSCEV *Reg,size_t LUIdx);

394void swapAndDropUse(size_t LUIdx,size_t LastLUIdx);

395

396bool isRegUsedByUsesOtherThan(constSCEV *Reg,size_t LUIdx)const;

397

398constSmallBitVector &getUsedByIndices(constSCEV *Reg)const;

399

400void clear();

401

402usingiterator =SmallVectorImpl<const SCEV *>::iterator;

403usingconst_iterator =SmallVectorImpl<const SCEV *>::const_iterator;

404

405 iteratorbegin() {returnRegSequence.begin(); }

406 iteratorend() {returnRegSequence.end(); }

407const_iterator begin() const{returnRegSequence.begin(); }

408const_iterator end() const{returnRegSequence.end(); }

409};

410

411}// end anonymous namespace

412

413void

414RegUseTracker::countRegister(constSCEV *Reg,size_t LUIdx) {

415 std::pair<RegUsesTy::iterator, bool> Pair =

416 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));

417 RegSortData &RSD = Pair.first->second;

418if (Pair.second)

419RegSequence.push_back(Reg);

420 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));

421 RSD.UsedByIndices.set(LUIdx);

422}

423

424void

425RegUseTracker::dropRegister(constSCEV *Reg,size_t LUIdx) {

426 RegUsesTy::iterator It = RegUsesMap.find(Reg);

427assert(It != RegUsesMap.end());

428 RegSortData &RSD = It->second;

429assert(RSD.UsedByIndices.size() > LUIdx);

430 RSD.UsedByIndices.reset(LUIdx);

431}

432

433void

434RegUseTracker::swapAndDropUse(size_t LUIdx,size_t LastLUIdx) {

435assert(LUIdx <= LastLUIdx);

436

437// Update RegUses. The data structure is not optimized for this purpose;

438// we must iterate through it and update each of the bit vectors.

439for (auto &Pair : RegUsesMap) {

440SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;

441if (LUIdx < UsedByIndices.size())

442 UsedByIndices[LUIdx] =

443 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] :false;

444 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));

445 }

446}

447

448bool

449RegUseTracker::isRegUsedByUsesOtherThan(constSCEV *Reg,size_t LUIdx) const{

450 RegUsesTy::const_iteratorI = RegUsesMap.find(Reg);

451if (I == RegUsesMap.end())

452returnfalse;

453constSmallBitVector &UsedByIndices =I->second.UsedByIndices;

454int i = UsedByIndices.find_first();

455if (i == -1)returnfalse;

456if ((size_t)i != LUIdx)returntrue;

457return UsedByIndices.find_next(i) != -1;

458}

459

460constSmallBitVector &RegUseTracker::getUsedByIndices(constSCEV *Reg) const{

461 RegUsesTy::const_iteratorI = RegUsesMap.find(Reg);

462assert(I != RegUsesMap.end() &&"Unknown register!");

463returnI->second.UsedByIndices;

464}

465

466void RegUseTracker::clear() {

467 RegUsesMap.clear();

468RegSequence.clear();

469}

470

471namespace{

472

473/// This class holds information that describes a formula for computing

474/// satisfying a use. It may include broken-out immediates and scaled registers.

475structFormula {

476 /// Global base address used for complex addressing.

477GlobalValue *BaseGV =nullptr;

478

479 /// Base offset for complex addressing.

480 Immediate BaseOffset = Immediate::getZero();

481

482 /// Whether any complex addressing has a base register.

483bool HasBaseReg =false;

484

485 /// The scale of any complex addressing.

486 int64_t Scale = 0;

487

488 /// The list of "base" registers for this use. When this is non-empty. The

489 /// canonical representation of a formula is

490 /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and

491 /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().

492 /// 3. The reg containing recurrent expr related with currect loop in the

493 /// formula should be put in the ScaledReg.

494 /// #1 enforces that the scaled register is always used when at least two

495 /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.

496 /// #2 enforces that 1 * reg is reg.

497 /// #3 ensures invariant regs with respect to current loop can be combined

498 /// together in LSR codegen.

499 /// This invariant can be temporarily broken while building a formula.

500 /// However, every formula inserted into the LSRInstance must be in canonical

501 /// form.

502SmallVector<const SCEV *, 4> BaseRegs;

503

504 /// The 'scaled' register for this use. This should be non-null when Scale is

505 /// not zero.

506constSCEV *ScaledReg =nullptr;

507

508 /// An additional constant offset which added near the use. This requires a

509 /// temporary register, but the offset itself can live in an add immediate

510 /// field rather than a register.

511 Immediate UnfoldedOffset = Immediate::getZero();

512

513 Formula() =default;

514

515void initialMatch(constSCEV *S,Loop *L,ScalarEvolution &SE);

516

517boolisCanonical(constLoop &L)const;

518

519void canonicalize(constLoop &L);

520

521bool unscale();

522

523bool hasZeroEnd()const;

524

525size_t getNumRegs()const;

526Type *getType()const;

527

528void deleteBaseReg(constSCEV *&S);

529

530bool referencesReg(constSCEV *S)const;

531bool hasRegsUsedByUsesOtherThan(size_t LUIdx,

532const RegUseTracker &RegUses)const;

533

534voidprint(raw_ostream &OS)const;

535voiddump()const;

536};

537

538}// end anonymous namespace

539

540/// Recursion helper for initialMatch.

541staticvoidDoInitialMatch(constSCEV *S,Loop *L,

542SmallVectorImpl<const SCEV *> &Good,

543SmallVectorImpl<const SCEV *> &Bad,

544ScalarEvolution &SE) {

545// Collect expressions which properly dominate the loop header.

546if (SE.properlyDominates(S, L->getHeader())) {

547 Good.push_back(S);

548return;

549 }

550

551// Look at add operands.

552if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

553for (constSCEV *S :Add->operands())

554DoInitialMatch(S, L, Good, Bad, SE);

555return;

556 }

557

558// Look at addrec operands.

559if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))

560if (!AR->getStart()->isZero() && AR->isAffine()) {

561DoInitialMatch(AR->getStart(), L, Good, Bad, SE);

562DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),

563 AR->getStepRecurrence(SE),

564// FIXME: AR->getNoWrapFlags()

565 AR->getLoop(),SCEV::FlagAnyWrap),

566 L, Good, Bad, SE);

567return;

568 }

569

570// Handle a multiplication by -1 (negation) if it didn't fold.

571if (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))

572if (Mul->getOperand(0)->isAllOnesValue()) {

573SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));

574constSCEV *NewMul = SE.getMulExpr(Ops);

575

576SmallVector<const SCEV *, 4> MyGood;

577SmallVector<const SCEV *, 4> MyBad;

578DoInitialMatch(NewMul, L, MyGood, MyBad, SE);

579constSCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(

580 SE.getEffectiveSCEVType(NewMul->getType())));

581for (constSCEV *S : MyGood)

582 Good.push_back(SE.getMulExpr(NegOne, S));

583for (constSCEV *S : MyBad)

584 Bad.push_back(SE.getMulExpr(NegOne, S));

585return;

586 }

587

588// Ok, we can't do anything interesting. Just stuff the whole thing into a

589// register and hope for the best.

590 Bad.push_back(S);

591}

592

593/// Incorporate loop-variant parts of S into this Formula, attempting to keep

594/// all loop-invariant and loop-computable values in a single base register.

595void Formula::initialMatch(constSCEV *S,Loop *L,ScalarEvolution &SE) {

596SmallVector<const SCEV *, 4> Good;

597SmallVector<const SCEV *, 4> Bad;

598DoInitialMatch(S, L, Good, Bad, SE);

599if (!Good.empty()) {

600constSCEV *Sum = SE.getAddExpr(Good);

601if (!Sum->isZero())

602 BaseRegs.push_back(Sum);

603 HasBaseReg =true;

604 }

605if (!Bad.empty()) {

606constSCEV *Sum = SE.getAddExpr(Bad);

607if (!Sum->isZero())

608 BaseRegs.push_back(Sum);

609 HasBaseReg =true;

610 }

611 canonicalize(*L);

612}

613

614staticboolcontainsAddRecDependentOnLoop(constSCEV *S,constLoop &L) {

615returnSCEVExprContains(S, [&L](constSCEV *S) {

616return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);

617 });

618}

619

620/// Check whether or not this formula satisfies the canonical

621/// representation.

622/// \see Formula::BaseRegs.

623bool Formula::isCanonical(constLoop &L) const{

624assert((Scale == 0 || ScaledReg) &&

625"ScaledReg must be non-null if Scale is non-zero");

626

627if (!ScaledReg)

628return BaseRegs.size() <= 1;

629

630if (Scale != 1)

631returntrue;

632

633if (Scale == 1 && BaseRegs.empty())

634returnfalse;

635

636if (containsAddRecDependentOnLoop(ScaledReg, L))

637returntrue;

638

639// If ScaledReg is not a recurrent expr, or it is but its loop is not current

640// loop, meanwhile BaseRegs contains a recurrent expr reg related with current

641// loop, we want to swap the reg in BaseRegs with ScaledReg.

642returnnone_of(BaseRegs, [&L](constSCEV *S) {

643returncontainsAddRecDependentOnLoop(S, L);

644 });

645}

646

647/// Helper method to morph a formula into its canonical representation.

648/// \see Formula::BaseRegs.

649/// Every formula having more than one base register, must use the ScaledReg

650/// field. Otherwise, we would have to do special cases everywhere in LSR

651/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...

652/// On the other hand, 1*reg should be canonicalized into reg.

653void Formula::canonicalize(constLoop &L) {

654if (isCanonical(L))

655return;

656

657if (BaseRegs.empty()) {

658// No base reg? Use scale reg with scale = 1 as such.

659assert(ScaledReg &&"Expected 1*reg => reg");

660assert(Scale == 1 &&"Expected 1*reg => reg");

661 BaseRegs.push_back(ScaledReg);

662 Scale = 0;

663 ScaledReg =nullptr;

664return;

665 }

666

667// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.

668if (!ScaledReg) {

669 ScaledReg = BaseRegs.pop_back_val();

670 Scale = 1;

671 }

672

673// If ScaledReg is an invariant with respect to L, find the reg from

674// BaseRegs containing the recurrent expr related with Loop L. Swap the

675// reg with ScaledReg.

676if (!containsAddRecDependentOnLoop(ScaledReg, L)) {

677autoI =find_if(BaseRegs, [&L](constSCEV *S) {

678returncontainsAddRecDependentOnLoop(S, L);

679 });

680if (I != BaseRegs.end())

681std::swap(ScaledReg, *I);

682 }

683assert(isCanonical(L) &&"Failed to canonicalize?");

684}

685

686/// Get rid of the scale in the formula.

687/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.

688/// \return true if it was possible to get rid of the scale, false otherwise.

689/// \note After this operation the formula may not be in the canonical form.

690bool Formula::unscale() {

691if (Scale != 1)

692returnfalse;

693 Scale = 0;

694 BaseRegs.push_back(ScaledReg);

695 ScaledReg =nullptr;

696returntrue;

697}

698

699bool Formula::hasZeroEnd() const{

700if (UnfoldedOffset || BaseOffset)

701returnfalse;

702if (BaseRegs.size() != 1 || ScaledReg)

703returnfalse;

704returntrue;

705}

706

707/// Return the total number of register operands used by this formula. This does

708/// not include register uses implied by non-constant addrec strides.

709size_t Formula::getNumRegs() const{

710return !!ScaledReg + BaseRegs.size();

711}

712

713/// Return the type of this formula, if it has one, or null otherwise. This type

714/// is meaningless except for the bit size.

715Type *Formula::getType() const{

716return !BaseRegs.empty() ? BaseRegs.front()->getType() :

717 ScaledReg ? ScaledReg->getType() :

718 BaseGV ? BaseGV->getType() :

719nullptr;

720}

721

722/// Delete the given base reg from the BaseRegs list.

723void Formula::deleteBaseReg(constSCEV *&S) {

724if (&S != &BaseRegs.back())

725std::swap(S, BaseRegs.back());

726 BaseRegs.pop_back();

727}

728

729/// Test if this formula references the given register.

730bool Formula::referencesReg(constSCEV *S) const{

731return S == ScaledReg ||is_contained(BaseRegs, S);

732}

733

734/// Test whether this formula uses registers which are used by uses other than

735/// the use with the given index.

736bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,

737const RegUseTracker &RegUses) const{

738if (ScaledReg)

739if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))

740returntrue;

741for (constSCEV *BaseReg : BaseRegs)

742if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))

743returntrue;

744returnfalse;

745}

746

747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

748void Formula::print(raw_ostream &OS) const{

749boolFirst =true;

750if (BaseGV) {

751if (!First)OS <<" + ";elseFirst =false;

752 BaseGV->printAsOperand(OS,/*PrintType=*/false);

753 }

754if (BaseOffset.isNonZero()) {

755if (!First)OS <<" + ";elseFirst =false;

756OS << BaseOffset;

757 }

758for (constSCEV *BaseReg : BaseRegs) {

759if (!First)OS <<" + ";elseFirst =false;

760OS <<"reg(" << *BaseReg <<')';

761 }

762if (HasBaseReg && BaseRegs.empty()) {

763if (!First)OS <<" + ";elseFirst =false;

764OS <<"**error: HasBaseReg**";

765 }elseif (!HasBaseReg && !BaseRegs.empty()) {

766if (!First)OS <<" + ";elseFirst =false;

767OS <<"**error: !HasBaseReg**";

768 }

769if (Scale != 0) {

770if (!First)OS <<" + ";elseFirst =false;

771OS << Scale <<"*reg(";

772if (ScaledReg)

773OS << *ScaledReg;

774else

775OS <<"<unknown>";

776OS <<')';

777 }

778if (UnfoldedOffset.isNonZero()) {

779if (!First)OS <<" + ";

780OS <<"imm(" << UnfoldedOffset <<')';

781 }

782}

783

784LLVM_DUMP_METHODvoid Formula::dump() const{

785print(errs());errs() <<'\n';

786}

787#endif

788

789/// Return true if the given addrec can be sign-extended without changing its

790/// value.

791staticboolisAddRecSExtable(constSCEVAddRecExpr *AR,ScalarEvolution &SE) {

792Type *WideTy =

793IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);

794return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));

795}

796

797/// Return true if the given add can be sign-extended without changing its

798/// value.

799staticboolisAddSExtable(constSCEVAddExpr *A,ScalarEvolution &SE) {

800Type *WideTy =

801IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);

802return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));

803}

804

805/// Return true if the given mul can be sign-extended without changing its

806/// value.

807staticboolisMulSExtable(constSCEVMulExpr *M,ScalarEvolution &SE) {

808Type *WideTy =

809IntegerType::get(SE.getContext(),

810 SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());

811return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));

812}

813

814/// Return an expression for LHS /s RHS, if it can be determined and if the

815/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits

816/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that

817/// the multiplication may overflow, which is useful when the result will be

818/// used in a context where the most significant bits are ignored.

819staticconstSCEV *getExactSDiv(constSCEV *LHS,constSCEV *RHS,

820ScalarEvolution &SE,

821bool IgnoreSignificantBits =false) {

822// Handle the trivial case, which works for any SCEV type.

823if (LHS ==RHS)

824return SE.getConstant(LHS->getType(), 1);

825

826// Handle a few RHS special cases.

827constSCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);

828if (RC) {

829constAPInt &RA = RC->getAPInt();

830// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do

831// some folding.

832if (RA.isAllOnes()) {

833if (LHS->getType()->isPointerTy())

834returnnullptr;

835return SE.getMulExpr(LHS, RC);

836 }

837// Handle x /s 1 as x.

838if (RA == 1)

839returnLHS;

840 }

841

842// Check for a division of a constant by a constant.

843if (constSCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {

844if (!RC)

845returnnullptr;

846constAPInt &LA =C->getAPInt();

847constAPInt &RA = RC->getAPInt();

848if (LA.srem(RA) != 0)

849returnnullptr;

850return SE.getConstant(LA.sdiv(RA));

851 }

852

853// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.

854if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {

855if ((IgnoreSignificantBits ||isAddRecSExtable(AR, SE)) && AR->isAffine()) {

856constSCEV *Step =getExactSDiv(AR->getStepRecurrence(SE),RHS, SE,

857 IgnoreSignificantBits);

858if (!Step)returnnullptr;

859constSCEV *Start =getExactSDiv(AR->getStart(),RHS, SE,

860 IgnoreSignificantBits);

861if (!Start)returnnullptr;

862// FlagNW is independent of the start value, step direction, and is

863// preserved with smaller magnitude steps.

864// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

865return SE.getAddRecExpr(Start, Step, AR->getLoop(),SCEV::FlagAnyWrap);

866 }

867returnnullptr;

868 }

869

870// Distribute the sdiv over add operands, if the add doesn't overflow.

871if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {

872if (IgnoreSignificantBits ||isAddSExtable(Add, SE)) {

873SmallVector<const SCEV *, 8> Ops;

874for (constSCEV *S :Add->operands()) {

875constSCEV *Op =getExactSDiv(S,RHS, SE, IgnoreSignificantBits);

876if (!Op)returnnullptr;

877 Ops.push_back(Op);

878 }

879return SE.getAddExpr(Ops);

880 }

881returnnullptr;

882 }

883

884// Check for a multiply operand that we can pull RHS out of.

885if (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {

886if (IgnoreSignificantBits ||isMulSExtable(Mul, SE)) {

887// Handle special case C1*X*Y /s C2*X*Y.

888if (constSCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {

889if (IgnoreSignificantBits ||isMulSExtable(MulRHS, SE)) {

890constSCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));

891constSCEVConstant *RC =

892 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));

893if (LC && RC) {

894SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));

895SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));

896if (LOps == ROps)

897returngetExactSDiv(LC, RC, SE, IgnoreSignificantBits);

898 }

899 }

900 }

901

902SmallVector<const SCEV *, 4> Ops;

903bool Found =false;

904for (constSCEV *S :Mul->operands()) {

905if (!Found)

906if (constSCEV *Q =getExactSDiv(S,RHS, SE,

907 IgnoreSignificantBits)) {

908 S = Q;

909 Found =true;

910 }

911 Ops.push_back(S);

912 }

913return Found ? SE.getMulExpr(Ops) :nullptr;

914 }

915returnnullptr;

916 }

917

918// Otherwise we don't know.

919returnnullptr;

920}

921

922/// If S involves the addition of a constant integer value, return that integer

923/// value, and mutate S to point to a new SCEV with that value excluded.

924static ImmediateExtractImmediate(constSCEV *&S,ScalarEvolution &SE) {

925if (constSCEVConstant *C = dyn_cast<SCEVConstant>(S)) {

926if (C->getAPInt().getSignificantBits() <= 64) {

927 S = SE.getConstant(C->getType(), 0);

928return Immediate::getFixed(C->getValue()->getSExtValue());

929 }

930 }elseif (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

931SmallVector<const SCEV *, 8> NewOps(Add->operands());

932 Immediate Result =ExtractImmediate(NewOps.front(), SE);

933if (Result.isNonZero())

934 S = SE.getAddExpr(NewOps);

935return Result;

936 }elseif (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

937SmallVector<const SCEV *, 8> NewOps(AR->operands());

938 Immediate Result =ExtractImmediate(NewOps.front(), SE);

939if (Result.isNonZero())

940 S = SE.getAddRecExpr(NewOps, AR->getLoop(),

941// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

942SCEV::FlagAnyWrap);

943return Result;

944 }elseif (constSCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {

945if (EnableVScaleImmediates && M->getNumOperands() == 2) {

946if (constSCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))

947if (isa<SCEVVScale>(M->getOperand(1))) {

948 S = SE.getConstant(M->getType(), 0);

949return Immediate::getScalable(C->getValue()->getSExtValue());

950 }

951 }

952 }

953return Immediate::getZero();

954}

955

956/// If S involves the addition of a GlobalValue address, return that symbol, and

957/// mutate S to point to a new SCEV with that value excluded.

958staticGlobalValue *ExtractSymbol(constSCEV *&S,ScalarEvolution &SE) {

959if (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {

960if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {

961 S = SE.getConstant(GV->getType(), 0);

962return GV;

963 }

964 }elseif (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

965SmallVector<const SCEV *, 8> NewOps(Add->operands());

966GlobalValue *Result =ExtractSymbol(NewOps.back(), SE);

967if (Result)

968 S = SE.getAddExpr(NewOps);

969return Result;

970 }elseif (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

971SmallVector<const SCEV *, 8> NewOps(AR->operands());

972GlobalValue *Result =ExtractSymbol(NewOps.front(), SE);

973if (Result)

974 S = SE.getAddRecExpr(NewOps, AR->getLoop(),

975// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

976SCEV::FlagAnyWrap);

977return Result;

978 }

979returnnullptr;

980}

981

982/// Returns true if the specified instruction is using the specified value as an

983/// address.

984staticboolisAddressUse(constTargetTransformInfo &TTI,

985Instruction *Inst,Value *OperandVal) {

986bool isAddress = isa<LoadInst>(Inst);

987if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {

988if (SI->getPointerOperand() == OperandVal)

989 isAddress =true;

990 }elseif (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {

991// Addressing modes can also be folded into prefetches and a variety

992// of intrinsics.

993switch (II->getIntrinsicID()) {

994case Intrinsic::memset:

995case Intrinsic::prefetch:

996case Intrinsic::masked_load:

997if (II->getArgOperand(0) == OperandVal)

998 isAddress =true;

999break;

1000case Intrinsic::masked_store:

1001if (II->getArgOperand(1) == OperandVal)

1002 isAddress =true;

1003break;

1004case Intrinsic::memmove:

1005case Intrinsic::memcpy:

1006if (II->getArgOperand(0) == OperandVal ||

1007II->getArgOperand(1) == OperandVal)

1008 isAddress =true;

1009break;

1010default: {

1011MemIntrinsicInfo IntrInfo;

1012if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {

1013if (IntrInfo.PtrVal == OperandVal)

1014 isAddress =true;

1015 }

1016 }

1017 }

1018 }elseif (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {

1019if (RMW->getPointerOperand() == OperandVal)

1020 isAddress =true;

1021 }elseif (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {

1022if (CmpX->getPointerOperand() == OperandVal)

1023 isAddress =true;

1024 }

1025return isAddress;

1026}

1027

1028/// Return the type of the memory being accessed.

1029static MemAccessTygetAccessType(constTargetTransformInfo &TTI,

1030Instruction *Inst,Value *OperandVal) {

1031 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());

1032

1033// First get the type of memory being accessed.

1034if (Type *Ty = Inst->getAccessType())

1035 AccessTy.MemTy = Ty;

1036

1037// Then get the pointer address space.

1038if (constStoreInst *SI = dyn_cast<StoreInst>(Inst)) {

1039 AccessTy.AddrSpace = SI->getPointerAddressSpace();

1040 }elseif (constLoadInst *LI = dyn_cast<LoadInst>(Inst)) {

1041 AccessTy.AddrSpace = LI->getPointerAddressSpace();

1042 }elseif (constAtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {

1043 AccessTy.AddrSpace = RMW->getPointerAddressSpace();

1044 }elseif (constAtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {

1045 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();

1046 }elseif (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {

1047switch (II->getIntrinsicID()) {

1048case Intrinsic::prefetch:

1049case Intrinsic::memset:

1050 AccessTy.AddrSpace =II->getArgOperand(0)->getType()->getPointerAddressSpace();

1051 AccessTy.MemTy = OperandVal->getType();

1052break;

1053case Intrinsic::memmove:

1054case Intrinsic::memcpy:

1055 AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();

1056 AccessTy.MemTy = OperandVal->getType();

1057break;

1058case Intrinsic::masked_load:

1059 AccessTy.AddrSpace =

1060II->getArgOperand(0)->getType()->getPointerAddressSpace();

1061break;

1062case Intrinsic::masked_store:

1063 AccessTy.AddrSpace =

1064II->getArgOperand(1)->getType()->getPointerAddressSpace();

1065break;

1066default: {

1067MemIntrinsicInfo IntrInfo;

1068if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {

1069 AccessTy.AddrSpace

1070 = IntrInfo.PtrVal->getType()->getPointerAddressSpace();

1071 }

1072

1073break;

1074 }

1075 }

1076 }

1077

1078return AccessTy;

1079}

1080

1081/// Return true if this AddRec is already a phi in its loop.

1082staticboolisExistingPhi(constSCEVAddRecExpr *AR,ScalarEvolution &SE) {

1083for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {

1084if (SE.isSCEVable(PN.getType()) &&

1085 (SE.getEffectiveSCEVType(PN.getType()) ==

1086 SE.getEffectiveSCEVType(AR->getType())) &&

1087 SE.getSCEV(&PN) == AR)

1088returntrue;

1089 }

1090returnfalse;

1091}

1092

1093/// Check if expanding this expression is likely to incur significant cost. This

1094/// is tricky because SCEV doesn't track which expressions are actually computed

1095/// by the current IR.

1096///

1097/// We currently allow expansion of IV increments that involve adds,

1098/// multiplication by constants, and AddRecs from existing phis.

1099///

1100/// TODO: Allow UDivExpr if we can find an existing IV increment that is an

1101/// obvious multiple of the UDivExpr.

1102staticboolisHighCostExpansion(constSCEV *S,

1103SmallPtrSetImpl<const SCEV*> &Processed,

1104ScalarEvolution &SE) {

1105// Zero/One operand expressions

1106switch (S->getSCEVType()) {

1107casescUnknown:

1108casescConstant:

1109casescVScale:

1110returnfalse;

1111casescTruncate:

1112returnisHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),

1113 Processed, SE);

1114casescZeroExtend:

1115returnisHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),

1116 Processed, SE);

1117casescSignExtend:

1118returnisHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),

1119 Processed, SE);

1120default:

1121break;

1122 }

1123

1124if (!Processed.insert(S).second)

1125returnfalse;

1126

1127if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

1128for (constSCEV *S :Add->operands()) {

1129if (isHighCostExpansion(S, Processed, SE))

1130returntrue;

1131 }

1132returnfalse;

1133 }

1134

1135if (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {

1136if (Mul->getNumOperands() == 2) {

1137// Multiplication by a constant is ok

1138if (isa<SCEVConstant>(Mul->getOperand(0)))

1139returnisHighCostExpansion(Mul->getOperand(1), Processed, SE);

1140

1141// If we have the value of one operand, check if an existing

1142// multiplication already generates this expression.

1143if (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {

1144Value *UVal = U->getValue();

1145for (User *UR : UVal->users()) {

1146// If U is a constant, it may be used by a ConstantExpr.

1147Instruction *UI = dyn_cast<Instruction>(UR);

1148if (UI && UI->getOpcode() == Instruction::Mul &&

1149 SE.isSCEVable(UI->getType())) {

1150return SE.getSCEV(UI) ==Mul;

1151 }

1152 }

1153 }

1154 }

1155 }

1156

1157if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

1158if (isExistingPhi(AR, SE))

1159returnfalse;

1160 }

1161

1162// Fow now, consider any other type of expression (div/mul/min/max) high cost.

1163returntrue;

1164}

1165

1166namespace{

1167

1168classLSRUse;

1169

1170}// end anonymous namespace

1171

1172/// Check if the addressing mode defined by \p F is completely

1173/// folded in \p LU at isel time.

1174/// This includes address-mode folding and special icmp tricks.

1175/// This function returns true if \p LU can accommodate what \p F

1176/// defines and up to 1 base + 1 scaled + offset.

1177/// In other words, if \p F has several base registers, this function may

1178/// still return true. Therefore, users still need to account for

1179/// additional base registers and/or unfolded offsets to derive an

1180/// accurate cost model.

1181staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,

1182const LSRUse &LU,const Formula &F);

1183

1184// Get the cost of the scaling factor used in F for LU.

1185staticInstructionCost getScalingFactorCost(constTargetTransformInfo &TTI,

1186const LSRUse &LU,const Formula &F,

1187constLoop &L);

1188

1189namespace{

1190

1191/// This class is used to measure and compare candidate formulae.

1192classCost {

1193constLoop *L =nullptr;

1194ScalarEvolution *SE =nullptr;

1195constTargetTransformInfo *TTI =nullptr;

1196TargetTransformInfo::LSRCost C;

1197TTI::AddressingModeKind AMK =TTI::AMK_None;

1198

1199public:

1200Cost() =delete;

1201Cost(constLoop *L,ScalarEvolution &SE,constTargetTransformInfo &TTI,

1202TTI::AddressingModeKind AMK) :

1203L(L), SE(&SE),TTI(&TTI), AMK(AMK) {

1204C.Insns = 0;

1205C.NumRegs = 0;

1206C.AddRecCost = 0;

1207C.NumIVMuls = 0;

1208C.NumBaseAdds = 0;

1209C.ImmCost = 0;

1210C.SetupCost = 0;

1211C.ScaleCost = 0;

1212 }

1213

1214bool isLess(constCost &Other)const;

1215

1216void Lose();

1217

1218#ifndef NDEBUG

1219// Once any of the metrics loses, they must all remain losers.

1220boolisValid() {

1221return ((C.Insns |C.NumRegs |C.AddRecCost |C.NumIVMuls |C.NumBaseAdds

1222 |C.ImmCost |C.SetupCost |C.ScaleCost) != ~0u)

1223 || ((C.Insns &C.NumRegs &C.AddRecCost &C.NumIVMuls &C.NumBaseAdds

1224 &C.ImmCost &C.SetupCost &C.ScaleCost) == ~0u);

1225 }

1226#endif

1227

1228bool isLoser() {

1229assert(isValid() &&"invalid cost");

1230returnC.NumRegs == ~0u;

1231 }

1232

1233void RateFormula(const Formula &F,

1234SmallPtrSetImpl<const SCEV *> &Regs,

1235constDenseSet<const SCEV *> &VisitedRegs,

1236const LSRUse &LU,

1237SmallPtrSetImpl<const SCEV *> *LoserRegs =nullptr);

1238

1239voidprint(raw_ostream &OS)const;

1240voiddump()const;

1241

1242private:

1243void RateRegister(const Formula &F,constSCEV *Reg,

1244SmallPtrSetImpl<const SCEV *> &Regs);

1245void RatePrimaryRegister(const Formula &F,constSCEV *Reg,

1246SmallPtrSetImpl<const SCEV *> &Regs,

1247SmallPtrSetImpl<const SCEV *> *LoserRegs);

1248};

1249

1250/// An operand value in an instruction which is to be replaced with some

1251/// equivalent, possibly strength-reduced, replacement.

1252structLSRFixup {

1253 /// The instruction which will be updated.

1254Instruction *UserInst =nullptr;

1255

1256 /// The operand of the instruction which will be replaced. The operand may be

1257 /// used more than once; every instance will be replaced.

1258Value *OperandValToReplace =nullptr;

1259

1260 /// If this user is to use the post-incremented value of an induction

1261 /// variable, this set is non-empty and holds the loops associated with the

1262 /// induction variable.

1263PostIncLoopSet PostIncLoops;

1264

1265 /// A constant offset to be added to the LSRUse expression. This allows

1266 /// multiple fixups to share the same LSRUse with different offsets, for

1267 /// example in an unrolled loop.

1268 ImmediateOffset = Immediate::getZero();

1269

1270 LSRFixup() =default;

1271

1272bool isUseFullyOutsideLoop(constLoop *L)const;

1273

1274voidprint(raw_ostream &OS)const;

1275voiddump()const;

1276};

1277

1278/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted

1279/// SmallVectors of const SCEV*.

1280structUniquifierDenseMapInfo {

1281staticSmallVector<const SCEV *, 4> getEmptyKey() {

1282SmallVector<const SCEV *, 4>V;

1283V.push_back(reinterpret_cast<constSCEV *>(-1));

1284returnV;

1285 }

1286

1287staticSmallVector<const SCEV *, 4> getTombstoneKey() {

1288SmallVector<const SCEV *, 4>V;

1289V.push_back(reinterpret_cast<constSCEV *>(-2));

1290returnV;

1291 }

1292

1293staticunsigned getHashValue(constSmallVector<const SCEV *, 4> &V) {

1294returnstatic_cast<unsigned>(hash_combine_range(V.begin(),V.end()));

1295 }

1296

1297staticboolisEqual(constSmallVector<const SCEV *, 4> &LHS,

1298constSmallVector<const SCEV *, 4> &RHS) {

1299returnLHS ==RHS;

1300 }

1301};

1302

1303/// This class holds the state that LSR keeps for each use in IVUsers, as well

1304/// as uses invented by LSR itself. It includes information about what kinds of

1305/// things can be folded into the user, information about the user itself, and

1306/// information about how the use may be satisfied. TODO: Represent multiple

1307/// users of the same expression in common?

1308classLSRUse {

1309DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;

1310

1311public:

1312 /// An enum for a kind of use, indicating what types of scaled and immediate

1313 /// operands it might support.

1314enumKindType {

1315Basic,///< A normal use, with no folding.

1316 Special,///< A special case of basic, allowing -1 scales.

1317Address,///< An address use; folding according to TargetLowering

1318 ICmpZero///< An equality icmp with both operands folded into one.

1319// TODO: Add a generic icmp too?

1320 };

1321

1322usingSCEVUseKindPair =PointerIntPair<const SCEV *, 2, KindType>;

1323

1324KindType Kind;

1325 MemAccessTy AccessTy;

1326

1327 /// The list of operands which are to be replaced.

1328SmallVector<LSRFixup, 8>Fixups;

1329

1330 /// Keep track of the min and max offsets of the fixups.

1331 Immediate MinOffset = Immediate::getFixedMax();

1332 Immediate MaxOffset = Immediate::getFixedMin();

1333

1334 /// This records whether all of the fixups using this LSRUse are outside of

1335 /// the loop, in which case some special-case heuristics may be used.

1336bool AllFixupsOutsideLoop =true;

1337

1338 /// RigidFormula is set to true to guarantee that this use will be associated

1339 /// with a single formula--the one that initially matched. Some SCEV

1340 /// expressions cannot be expanded. This allows LSR to consider the registers

1341 /// used by those expressions without the need to expand them later after

1342 /// changing the formula.

1343bool RigidFormula =false;

1344

1345 /// This records the widest use type for any fixup using this

1346 /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max

1347 /// fixup widths to be equivalent, because the narrower one may be relying on

1348 /// the implicit truncation to truncate away bogus bits.

1349Type *WidestFixupType =nullptr;

1350

1351 /// A list of ways to build a value that can satisfy this user. After the

1352 /// list is populated, one of these is selected heuristically and used to

1353 /// formulate a replacement for OperandValToReplace in UserInst.

1354SmallVector<Formula, 12> Formulae;

1355

1356 /// The set of register candidates used by all formulae in this LSRUse.

1357SmallPtrSet<const SCEV *, 4> Regs;

1358

1359 LSRUse(KindType K, MemAccessTy AT) :Kind(K), AccessTy(AT) {}

1360

1361 LSRFixup &getNewFixup() {

1362Fixups.push_back(LSRFixup());

1363returnFixups.back();

1364 }

1365

1366void pushFixup(LSRFixup &f) {

1367Fixups.push_back(f);

1368if (Immediate::isKnownGT(f.Offset, MaxOffset))

1369 MaxOffset =f.Offset;

1370if (Immediate::isKnownLT(f.Offset, MinOffset))

1371 MinOffset =f.Offset;

1372 }

1373

1374bool HasFormulaWithSameRegs(const Formula &F)const;

1375float getNotSelectedProbability(constSCEV *Reg)const;

1376bool InsertFormula(const Formula &F,constLoop &L);

1377void DeleteFormula(Formula &F);

1378void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);

1379

1380voidprint(raw_ostream &OS)const;

1381voiddump()const;

1382};

1383

1384}// end anonymous namespace

1385

1386staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,

1387 LSRUse::KindType Kind, MemAccessTy AccessTy,

1388GlobalValue *BaseGV, Immediate BaseOffset,

1389bool HasBaseReg, int64_t Scale,

1390Instruction *Fixup =nullptr);

1391

1392staticunsignedgetSetupCost(constSCEV *Reg,unsignedDepth) {

1393if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))

1394return 1;

1395if (Depth == 0)

1396return 0;

1397if (constauto *S = dyn_cast<SCEVAddRecExpr>(Reg))

1398returngetSetupCost(S->getStart(),Depth - 1);

1399if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))

1400returngetSetupCost(S->getOperand(),Depth - 1);

1401if (auto S = dyn_cast<SCEVNAryExpr>(Reg))

1402return std::accumulate(S->operands().begin(), S->operands().end(), 0,

1403 [&](unsigned i,constSCEV *Reg) {

1404 return i + getSetupCost(Reg, Depth - 1);

1405 });

1406if (auto S = dyn_cast<SCEVUDivExpr>(Reg))

1407returngetSetupCost(S->getLHS(),Depth - 1) +

1408getSetupCost(S->getRHS(),Depth - 1);

1409return 0;

1410}

1411

1412/// Tally up interesting quantities from the given register.

1413void Cost::RateRegister(const Formula &F,constSCEV *Reg,

1414SmallPtrSetImpl<const SCEV *> &Regs) {

1415if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {

1416// If this is an addrec for another loop, it should be an invariant

1417// with respect to L since L is the innermost loop (at least

1418// for now LSR only handles innermost loops).

1419if (AR->getLoop() != L) {

1420// If the AddRec exists, consider it's register free and leave it alone.

1421if (isExistingPhi(AR, *SE) && AMK !=TTI::AMK_PostIndexed)

1422return;

1423

1424// It is bad to allow LSR for current loop to add induction variables

1425// for its sibling loops.

1426if (!AR->getLoop()->contains(L)) {

1427 Lose();

1428return;

1429 }

1430

1431// Otherwise, it will be an invariant with respect to Loop L.

1432 ++C.NumRegs;

1433return;

1434 }

1435

1436unsigned LoopCost = 1;

1437if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||

1438TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {

1439

1440// If the step size matches the base offset, we could use pre-indexed

1441// addressing.

1442if (AMK ==TTI::AMK_PreIndexed &&F.BaseOffset.isFixed()) {

1443if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))

1444if (Step->getAPInt() ==F.BaseOffset.getFixedValue())

1445 LoopCost = 0;

1446 }elseif (AMK ==TTI::AMK_PostIndexed) {

1447constSCEV *LoopStep = AR->getStepRecurrence(*SE);

1448if (isa<SCEVConstant>(LoopStep)) {

1449constSCEV *LoopStart = AR->getStart();

1450if (!isa<SCEVConstant>(LoopStart) &&

1451 SE->isLoopInvariant(LoopStart, L))

1452 LoopCost = 0;

1453 }

1454 }

1455 }

1456C.AddRecCost += LoopCost;

1457

1458// Add the step value register, if it needs one.

1459// TODO: The non-affine case isn't precisely modeled here.

1460if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {

1461if (!Regs.count(AR->getOperand(1))) {

1462 RateRegister(F, AR->getOperand(1), Regs);

1463if (isLoser())

1464return;

1465 }

1466 }

1467 }

1468 ++C.NumRegs;

1469

1470// Rough heuristic; favor registers which don't require extra setup

1471// instructions in the preheader.

1472C.SetupCost +=getSetupCost(Reg,SetupCostDepthLimit);

1473// Ensure we don't, even with the recusion limit, produce invalid costs.

1474C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);

1475

1476C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&

1477 SE->hasComputableLoopEvolution(Reg, L);

1478}

1479

1480/// Record this register in the set. If we haven't seen it before, rate

1481/// it. Optional LoserRegs provides a way to declare any formula that refers to

1482/// one of those regs an instant loser.

1483void Cost::RatePrimaryRegister(const Formula &F,constSCEV *Reg,

1484SmallPtrSetImpl<const SCEV *> &Regs,

1485SmallPtrSetImpl<const SCEV *> *LoserRegs) {

1486if (LoserRegs && LoserRegs->count(Reg)) {

1487 Lose();

1488return;

1489 }

1490if (Regs.insert(Reg).second) {

1491 RateRegister(F, Reg, Regs);

1492if (LoserRegs && isLoser())

1493 LoserRegs->insert(Reg);

1494 }

1495}

1496

1497void Cost::RateFormula(const Formula &F,

1498SmallPtrSetImpl<const SCEV *> &Regs,

1499constDenseSet<const SCEV *> &VisitedRegs,

1500const LSRUse &LU,

1501SmallPtrSetImpl<const SCEV *> *LoserRegs) {

1502if (isLoser())

1503return;

1504assert(F.isCanonical(*L) &&"Cost is accurate only for canonical formula");

1505// Tally up the registers.

1506unsigned PrevAddRecCost =C.AddRecCost;

1507unsigned PrevNumRegs =C.NumRegs;

1508unsigned PrevNumBaseAdds =C.NumBaseAdds;

1509if (constSCEV *ScaledReg =F.ScaledReg) {

1510if (VisitedRegs.count(ScaledReg)) {

1511 Lose();

1512return;

1513 }

1514 RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);

1515if (isLoser())

1516return;

1517 }

1518for (constSCEV *BaseReg :F.BaseRegs) {

1519if (VisitedRegs.count(BaseReg)) {

1520 Lose();

1521return;

1522 }

1523 RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);

1524if (isLoser())

1525return;

1526 }

1527

1528// Determine how many (unfolded) adds we'll need inside the loop.

1529size_t NumBaseParts =F.getNumRegs();

1530if (NumBaseParts > 1)

1531// Do not count the base and a possible second register if the target

1532// allows to fold 2 registers.

1533C.NumBaseAdds +=

1534 NumBaseParts - (1 + (F.Scale &&isAMCompletelyFolded(*TTI, LU,F)));

1535C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());

1536

1537// Accumulate non-free scaling amounts.

1538C.ScaleCost += *getScalingFactorCost(*TTI, LU,F, *L).getValue();

1539

1540// Tally up the non-zero immediates.

1541for (const LSRFixup &Fixup : LU.Fixups) {

1542if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {

1543 ImmediateOffset =Fixup.Offset.addUnsigned(F.BaseOffset);

1544if (F.BaseGV)

1545C.ImmCost += 64;// Handle symbolic values conservatively.

1546// TODO: This should probably be the pointer size.

1547elseif (Offset.isNonZero())

1548C.ImmCost +=

1549APInt(64,Offset.getKnownMinValue(),true).getSignificantBits();

1550

1551// Check with target if this offset with this instruction is

1552// specifically not supported.

1553if (LU.Kind == LSRUse::Address &&Offset.isNonZero() &&

1554 !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy,F.BaseGV,

1555Offset,F.HasBaseReg,F.Scale,Fixup.UserInst))

1556C.NumBaseAdds++;

1557 }else {

1558// Incompatible immediate type, increase cost to avoid using

1559C.ImmCost += 2048;

1560 }

1561 }

1562

1563// If we don't count instruction cost exit here.

1564if (!InsnsCost) {

1565assert(isValid() &&"invalid cost");

1566return;

1567 }

1568

1569// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as

1570// additional instruction (at least fill).

1571// TODO: Need distinguish register class?

1572unsigned TTIRegNum =TTI->getNumberOfRegisters(

1573TTI->getRegisterClassForType(false,F.getType())) - 1;

1574if (C.NumRegs > TTIRegNum) {

1575// Cost already exceeded TTIRegNum, then only newly added register can add

1576// new instructions.

1577if (PrevNumRegs > TTIRegNum)

1578C.Insns += (C.NumRegs - PrevNumRegs);

1579else

1580C.Insns += (C.NumRegs - TTIRegNum);

1581 }

1582

1583// If ICmpZero formula ends with not 0, it could not be replaced by

1584// just add or sub. We'll need to compare final result of AddRec.

1585// That means we'll need an additional instruction. But if the target can

1586// macro-fuse a compare with a branch, don't count this extra instruction.

1587// For -10 + {0, +, 1}:

1588// i = i + 1;

1589// cmp i, 10

1590//

1591// For {-10, +, 1}:

1592// i = i + 1;

1593if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&

1594 !TTI->canMacroFuseCmp())

1595C.Insns++;

1596// Each new AddRec adds 1 instruction to calculation.

1597C.Insns += (C.AddRecCost - PrevAddRecCost);

1598

1599// BaseAdds adds instructions for unfolded registers.

1600if (LU.Kind != LSRUse::ICmpZero)

1601C.Insns +=C.NumBaseAdds - PrevNumBaseAdds;

1602assert(isValid() &&"invalid cost");

1603}

1604

1605/// Set this cost to a losing value.

1606void Cost::Lose() {

1607C.Insns = std::numeric_limits<unsigned>::max();

1608C.NumRegs = std::numeric_limits<unsigned>::max();

1609C.AddRecCost = std::numeric_limits<unsigned>::max();

1610C.NumIVMuls = std::numeric_limits<unsigned>::max();

1611C.NumBaseAdds = std::numeric_limits<unsigned>::max();

1612C.ImmCost = std::numeric_limits<unsigned>::max();

1613C.SetupCost = std::numeric_limits<unsigned>::max();

1614C.ScaleCost = std::numeric_limits<unsigned>::max();

1615}

1616

1617/// Choose the lower cost.

1618bool Cost::isLess(constCost &Other) const{

1619if (InsnsCost.getNumOccurrences() > 0 &&InsnsCost &&

1620C.Insns !=Other.C.Insns)

1621returnC.Insns <Other.C.Insns;

1622returnTTI->isLSRCostLess(C,Other.C);

1623}

1624

1625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1626voidCost::print(raw_ostream &OS) const{

1627if (InsnsCost)

1628OS <<C.Insns <<" instruction" << (C.Insns == 1 ?" " :"s ");

1629OS <<C.NumRegs <<" reg" << (C.NumRegs == 1 ?"" :"s");

1630if (C.AddRecCost != 0)

1631OS <<", with addrec cost " <<C.AddRecCost;

1632if (C.NumIVMuls != 0)

1633OS <<", plus " <<C.NumIVMuls <<" IV mul"

1634 << (C.NumIVMuls == 1 ?"" :"s");

1635if (C.NumBaseAdds != 0)

1636OS <<", plus " <<C.NumBaseAdds <<" base add"

1637 << (C.NumBaseAdds == 1 ?"" :"s");

1638if (C.ScaleCost != 0)

1639OS <<", plus " <<C.ScaleCost <<" scale cost";

1640if (C.ImmCost != 0)

1641OS <<", plus " <<C.ImmCost <<" imm cost";

1642if (C.SetupCost != 0)

1643OS <<", plus " <<C.SetupCost <<" setup cost";

1644}

1645

1646LLVM_DUMP_METHODvoid Cost::dump() const{

1647print(errs());errs() <<'\n';

1648}

1649#endif

1650

1651/// Test whether this fixup always uses its value outside of the given loop.

1652bool LSRFixup::isUseFullyOutsideLoop(constLoop *L) const{

1653// PHI nodes use their value in their incoming blocks.

1654if (constPHINode *PN = dyn_cast<PHINode>(UserInst)) {

1655for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

1656if (PN->getIncomingValue(i) == OperandValToReplace &&

1657L->contains(PN->getIncomingBlock(i)))

1658returnfalse;

1659returntrue;

1660 }

1661

1662return !L->contains(UserInst);

1663}

1664

1665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1666void LSRFixup::print(raw_ostream &OS) const{

1667OS <<"UserInst=";

1668// Store is common and interesting enough to be worth special-casing.

1669if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {

1670OS <<"store ";

1671Store->getOperand(0)->printAsOperand(OS,/*PrintType=*/false);

1672 }elseif (UserInst->getType()->isVoidTy())

1673OS << UserInst->getOpcodeName();

1674else

1675 UserInst->printAsOperand(OS,/*PrintType=*/false);

1676

1677OS <<", OperandValToReplace=";

1678 OperandValToReplace->printAsOperand(OS,/*PrintType=*/false);

1679

1680for (constLoop *PIL : PostIncLoops) {

1681OS <<", PostIncLoop=";

1682 PIL->getHeader()->printAsOperand(OS,/*PrintType=*/false);

1683 }

1684

1685if (Offset.isNonZero())

1686OS <<", Offset=" <<Offset;

1687}

1688

1689LLVM_DUMP_METHODvoid LSRFixup::dump() const{

1690print(errs());errs() <<'\n';

1691}

1692#endif

1693

1694/// Test whether this use as a formula which has the same registers as the given

1695/// formula.

1696bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const{

1697SmallVector<const SCEV *, 4>Key =F.BaseRegs;

1698if (F.ScaledReg)Key.push_back(F.ScaledReg);

1699// Unstable sort by host order ok, because this is only used for uniquifying.

1700llvm::sort(Key);

1701return Uniquifier.count(Key);

1702}

1703

1704/// The function returns a probability of selecting formula without Reg.

1705float LSRUse::getNotSelectedProbability(constSCEV *Reg) const{

1706unsigned FNum = 0;

1707for (const Formula &F : Formulae)

1708if (F.referencesReg(Reg))

1709 FNum++;

1710return ((float)(Formulae.size() - FNum)) / Formulae.size();

1711}

1712

1713/// If the given formula has not yet been inserted, add it to the list, and

1714/// return true. Return false otherwise. The formula must be in canonical form.

1715bool LSRUse::InsertFormula(const Formula &F,constLoop &L) {

1716assert(F.isCanonical(L) &&"Invalid canonical representation");

1717

1718if (!Formulae.empty() && RigidFormula)

1719returnfalse;

1720

1721SmallVector<const SCEV *, 4>Key =F.BaseRegs;

1722if (F.ScaledReg)Key.push_back(F.ScaledReg);

1723// Unstable sort by host order ok, because this is only used for uniquifying.

1724llvm::sort(Key);

1725

1726if (!Uniquifier.insert(Key).second)

1727returnfalse;

1728

1729// Using a register to hold the value of 0 is not profitable.

1730assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&

1731"Zero allocated in a scaled register!");

1732#ifndef NDEBUG

1733for (constSCEV *BaseReg :F.BaseRegs)

1734assert(!BaseReg->isZero() &&"Zero allocated in a base register!");

1735#endif

1736

1737// Add the formula to the list.

1738 Formulae.push_back(F);

1739

1740// Record registers now being used by this use.

1741 Regs.insert(F.BaseRegs.begin(),F.BaseRegs.end());

1742if (F.ScaledReg)

1743 Regs.insert(F.ScaledReg);

1744

1745returntrue;

1746}

1747

1748/// Remove the given formula from this use's list.

1749void LSRUse::DeleteFormula(Formula &F) {

1750if (&F != &Formulae.back())

1751std::swap(F, Formulae.back());

1752 Formulae.pop_back();

1753}

1754

1755/// Recompute the Regs field, and update RegUses.

1756void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {

1757// Now that we've filtered out some formulae, recompute the Regs set.

1758SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);

1759 Regs.clear();

1760for (const Formula &F : Formulae) {

1761if (F.ScaledReg) Regs.insert(F.ScaledReg);

1762 Regs.insert(F.BaseRegs.begin(),F.BaseRegs.end());

1763 }

1764

1765// Update the RegTracker.

1766for (constSCEV *S : OldRegs)

1767if (!Regs.count(S))

1768 RegUses.dropRegister(S, LUIdx);

1769}

1770

1771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1772void LSRUse::print(raw_ostream &OS) const{

1773OS <<"LSR Use: Kind=";

1774switch (Kind) {

1775caseBasic:OS <<"Basic";break;

1776case Special:OS <<"Special";break;

1777case ICmpZero:OS <<"ICmpZero";break;

1778caseAddress:

1779OS <<"Address of ";

1780if (AccessTy.MemTy->isPointerTy())

1781OS <<"pointer";// the full pointer type could be really verbose

1782else {

1783OS << *AccessTy.MemTy;

1784 }

1785

1786OS <<" in addrspace(" << AccessTy.AddrSpace <<')';

1787 }

1788

1789OS <<", Offsets={";

1790bool NeedComma =false;

1791for (const LSRFixup &Fixup : Fixups) {

1792if (NeedComma)OS <<',';

1793OS <<Fixup.Offset;

1794 NeedComma =true;

1795 }

1796OS <<'}';

1797

1798if (AllFixupsOutsideLoop)

1799OS <<", all-fixups-outside-loop";

1800

1801if (WidestFixupType)

1802OS <<", widest fixup type: " << *WidestFixupType;

1803}

1804

1805LLVM_DUMP_METHODvoid LSRUse::dump() const{

1806print(errs());errs() <<'\n';

1807}

1808#endif

1809

1810staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,

1811 LSRUse::KindType Kind, MemAccessTy AccessTy,

1812GlobalValue *BaseGV, Immediate BaseOffset,

1813bool HasBaseReg, int64_t Scale,

1814Instruction *Fixup/* = nullptr */) {

1815switch (Kind) {

1816case LSRUse::Address: {

1817 int64_t FixedOffset =

1818 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();

1819 int64_t ScalableOffset =

1820 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;

1821returnTTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,

1822 HasBaseReg, Scale, AccessTy.AddrSpace,

1823Fixup, ScalableOffset);

1824 }

1825case LSRUse::ICmpZero:

1826// There's not even a target hook for querying whether it would be legal to

1827// fold a GV into an ICmp.

1828if (BaseGV)

1829returnfalse;

1830

1831// ICmp only has two operands; don't allow more than two non-trivial parts.

1832if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())

1833returnfalse;

1834

1835// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by

1836// putting the scaled register in the other operand of the icmp.

1837if (Scale != 0 && Scale != -1)

1838returnfalse;

1839

1840// If we have low-level target information, ask the target if it can fold an

1841// integer immediate on an icmp.

1842if (BaseOffset.isNonZero()) {

1843// We don't have an interface to query whether the target supports

1844// icmpzero against scalable quantities yet.

1845if (BaseOffset.isScalable())

1846returnfalse;

1847

1848// We have one of:

1849// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset

1850// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset

1851// Offs is the ICmp immediate.

1852if (Scale == 0)

1853// The cast does the right thing with

1854// std::numeric_limits<int64_t>::min().

1855 BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());

1856returnTTI.isLegalICmpImmediate(BaseOffset.getFixedValue());

1857 }

1858

1859// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg

1860returntrue;

1861

1862case LSRUse::Basic:

1863// Only handle single-register values.

1864return !BaseGV && Scale == 0 && BaseOffset.isZero();

1865

1866case LSRUse::Special:

1867// Special case Basic to handle -1 scales.

1868return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();

1869 }

1870

1871llvm_unreachable("Invalid LSRUse Kind!");

1872}

1873

1874staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,

1875 Immediate MinOffset, Immediate MaxOffset,

1876 LSRUse::KindType Kind, MemAccessTy AccessTy,

1877GlobalValue *BaseGV, Immediate BaseOffset,

1878bool HasBaseReg, int64_t Scale) {

1879if (BaseOffset.isNonZero() &&

1880 (BaseOffset.isScalable() != MinOffset.isScalable() ||

1881 BaseOffset.isScalable() != MaxOffset.isScalable()))

1882returnfalse;

1883// Check for overflow.

1884 int64_tBase = BaseOffset.getKnownMinValue();

1885 int64_t Min = MinOffset.getKnownMinValue();

1886 int64_t Max = MaxOffset.getKnownMinValue();

1887if (((int64_t)((uint64_t)Base + Min) >Base) != (Min > 0))

1888returnfalse;

1889 MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());

1890if (((int64_t)((uint64_t)Base + Max) >Base) != (Max > 0))

1891returnfalse;

1892 MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());

1893

1894returnisAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,

1895 HasBaseReg, Scale) &&

1896isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,

1897 HasBaseReg, Scale);

1898}

1899

1900staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,

1901 Immediate MinOffset, Immediate MaxOffset,

1902 LSRUse::KindType Kind, MemAccessTy AccessTy,

1903const Formula &F,constLoop &L) {

1904// For the purpose of isAMCompletelyFolded either having a canonical formula

1905// or a scale not equal to zero is correct.

1906// Problems may arise from non canonical formulae having a scale == 0.

1907// Strictly speaking it would best to just rely on canonical formulae.

1908// However, when we generate the scaled formulae, we first check that the

1909// scaling factor is profitable before computing the actual ScaledReg for

1910// compile time sake.

1911assert((F.isCanonical(L) ||F.Scale != 0));

1912returnisAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,

1913F.BaseGV,F.BaseOffset,F.HasBaseReg,F.Scale);

1914}

1915

1916/// Test whether we know how to expand the current formula.

1917staticboolisLegalUse(constTargetTransformInfo &TTI, Immediate MinOffset,

1918 Immediate MaxOffset, LSRUse::KindType Kind,

1919 MemAccessTy AccessTy,GlobalValue *BaseGV,

1920 Immediate BaseOffset,bool HasBaseReg, int64_t Scale) {

1921// We know how to expand completely foldable formulae.

1922returnisAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,

1923 BaseOffset, HasBaseReg, Scale) ||

1924// Or formulae that use a base register produced by a sum of base

1925// registers.

1926 (Scale == 1 &&

1927isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,

1928 BaseGV, BaseOffset,true, 0));

1929}

1930

1931staticboolisLegalUse(constTargetTransformInfo &TTI, Immediate MinOffset,

1932 Immediate MaxOffset, LSRUse::KindType Kind,

1933 MemAccessTy AccessTy,const Formula &F) {

1934returnisLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy,F.BaseGV,

1935F.BaseOffset,F.HasBaseReg,F.Scale);

1936}

1937

1938staticboolisLegalAddImmediate(constTargetTransformInfo &TTI,

1939 ImmediateOffset) {

1940if (Offset.isScalable())

1941returnTTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());

1942

1943returnTTI.isLegalAddImmediate(Offset.getFixedValue());

1944}

1945

1946staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,

1947const LSRUse &LU,const Formula &F) {

1948// Target may want to look at the user instructions.

1949if (LU.Kind == LSRUse::Address &&TTI.LSRWithInstrQueries()) {

1950for (const LSRFixup &Fixup : LU.Fixups)

1951if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy,F.BaseGV,

1952 (F.BaseOffset +Fixup.Offset),F.HasBaseReg,

1953F.Scale,Fixup.UserInst))

1954returnfalse;

1955returntrue;

1956 }

1957

1958returnisAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,

1959 LU.AccessTy,F.BaseGV,F.BaseOffset,F.HasBaseReg,

1960F.Scale);

1961}

1962

1963staticInstructionCost getScalingFactorCost(constTargetTransformInfo &TTI,

1964const LSRUse &LU,const Formula &F,

1965constLoop &L) {

1966if (!F.Scale)

1967return 0;

1968

1969// If the use is not completely folded in that instruction, we will have to

1970// pay an extra cost only for scale != 1.

1971if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,

1972 LU.AccessTy,F, L))

1973returnF.Scale != 1;

1974

1975switch (LU.Kind) {

1976case LSRUse::Address: {

1977// Check the scaling factor cost with both the min and max offsets.

1978 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;

1979if (F.BaseOffset.isScalable()) {

1980 ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();

1981 ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();

1982 }else {

1983 FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();

1984 FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();

1985 }

1986InstructionCost ScaleCostMinOffset =TTI.getScalingFactorCost(

1987 LU.AccessTy.MemTy,F.BaseGV,StackOffset::get(FixedMin, ScalableMin),

1988F.HasBaseReg,F.Scale, LU.AccessTy.AddrSpace);

1989InstructionCost ScaleCostMaxOffset =TTI.getScalingFactorCost(

1990 LU.AccessTy.MemTy,F.BaseGV,StackOffset::get(FixedMax, ScalableMax),

1991F.HasBaseReg,F.Scale, LU.AccessTy.AddrSpace);

1992

1993assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&

1994"Legal addressing mode has an illegal cost!");

1995return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);

1996 }

1997case LSRUse::ICmpZero:

1998case LSRUse::Basic:

1999case LSRUse::Special:

2000// The use is completely folded, i.e., everything is folded into the

2001// instruction.

2002return 0;

2003 }

2004

2005llvm_unreachable("Invalid LSRUse Kind!");

2006}

2007

2008staticboolisAlwaysFoldable(constTargetTransformInfo &TTI,

2009 LSRUse::KindType Kind, MemAccessTy AccessTy,

2010GlobalValue *BaseGV, Immediate BaseOffset,

2011bool HasBaseReg) {

2012// Fast-path: zero is always foldable.

2013if (BaseOffset.isZero() && !BaseGV)

2014returntrue;

2015

2016// Conservatively, create an address with an immediate and a

2017// base and a scale.

2018 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;

2019

2020// Canonicalize a scale of 1 to a base register if the formula doesn't

2021// already have a base register.

2022if (!HasBaseReg && Scale == 1) {

2023 Scale = 0;

2024 HasBaseReg =true;

2025 }

2026

2027// FIXME: Try with + without a scale? Maybe based on TTI?

2028// I think basereg + scaledreg + immediateoffset isn't a good 'conservative'

2029// default for many architectures, not just AArch64 SVE. More investigation

2030// needed later to determine if this should be used more widely than just

2031// on scalable types.

2032if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&

2033 AccessTy.MemTy && AccessTy.MemTy->isScalableTy() &&DropScaledForVScale)

2034 Scale = 0;

2035

2036returnisAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,

2037 HasBaseReg, Scale);

2038}

2039

2040staticboolisAlwaysFoldable(constTargetTransformInfo &TTI,

2041ScalarEvolution &SE, Immediate MinOffset,

2042 Immediate MaxOffset, LSRUse::KindType Kind,

2043 MemAccessTy AccessTy,constSCEV *S,

2044bool HasBaseReg) {

2045// Fast-path: zero is always foldable.

2046if (S->isZero())returntrue;

2047

2048// Conservatively, create an address with an immediate and a

2049// base and a scale.

2050 Immediate BaseOffset =ExtractImmediate(S, SE);

2051GlobalValue *BaseGV =ExtractSymbol(S, SE);

2052

2053// If there's anything else involved, it's not foldable.

2054if (!S->isZero())returnfalse;

2055

2056// Fast-path: zero is always foldable.

2057if (BaseOffset.isZero() && !BaseGV)

2058returntrue;

2059

2060if (BaseOffset.isScalable())

2061returnfalse;

2062

2063// Conservatively, create an address with an immediate and a

2064// base and a scale.

2065 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;

2066

2067returnisAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,

2068 BaseOffset, HasBaseReg, Scale);

2069}

2070

2071namespace{

2072

2073/// An individual increment in a Chain of IV increments. Relate an IV user to

2074/// an expression that computes the IV it uses from the IV used by the previous

2075/// link in the Chain.

2076///

2077/// For the head of a chain, IncExpr holds the absolute SCEV expression for the

2078/// original IVOperand. The head of the chain's IVOperand is only valid during

2079/// chain collection, before LSR replaces IV users. During chain generation,

2080/// IncExpr can be used to find the new IVOperand that computes the same

2081/// expression.

2082structIVInc {

2083Instruction *UserInst;

2084Value* IVOperand;

2085constSCEV *IncExpr;

2086

2087 IVInc(Instruction *U,Value *O,constSCEV *E)

2088 : UserInst(U), IVOperand(O), IncExpr(E) {}

2089};

2090

2091// The list of IV increments in program order. We typically add the head of a

2092// chain without finding subsequent links.

2093structIVChain {

2094SmallVector<IVInc, 1> Incs;

2095constSCEV *ExprBase =nullptr;

2096

2097 IVChain() =default;

2098 IVChain(const IVInc &Head,constSCEV *Base)

2099 : Incs(1, Head), ExprBase(Base) {}

2100

2101usingconst_iterator =SmallVectorImpl<IVInc>::const_iterator;

2102

2103// Return the first increment in the chain.

2104const_iterator begin() const{

2105assert(!Incs.empty());

2106return std::next(Incs.begin());

2107 }

2108const_iterator end() const{

2109return Incs.end();

2110 }

2111

2112// Returns true if this chain contains any increments.

2113bool hasIncs() const{return Incs.size() >= 2; }

2114

2115// Add an IVInc to the end of this chain.

2116voidadd(const IVInc &X) { Incs.push_back(X); }

2117

2118// Returns the last UserInst in the chain.

2119Instruction *tailUserInst() const{return Incs.back().UserInst; }

2120

2121// Returns true if IncExpr can be profitably added to this chain.

2122bool isProfitableIncrement(constSCEV *OperExpr,

2123constSCEV *IncExpr,

2124ScalarEvolution&);

2125};

2126

2127/// Helper for CollectChains to track multiple IV increment uses. Distinguish

2128/// between FarUsers that definitely cross IV increments and NearUsers that may

2129/// be used between IV increments.

2130structChainUsers {

2131SmallPtrSet<Instruction*, 4> FarUsers;

2132SmallPtrSet<Instruction*, 4> NearUsers;

2133};

2134

2135/// This class holds state for the main loop strength reduction logic.

2136classLSRInstance {

2137IVUsers &IU;

2138ScalarEvolution &SE;

2139DominatorTree &DT;

2140LoopInfo &LI;

2141AssumptionCache &AC;

2142TargetLibraryInfo &TLI;

2143constTargetTransformInfo &TTI;

2144Loop *constL;

2145MemorySSAUpdater *MSSAU;

2146TTI::AddressingModeKind AMK;

2147mutableSCEVExpander Rewriter;

2148bool Changed =false;

2149

2150 /// This is the insert position that the current loop's induction variable

2151 /// increment should be placed. In simple loops, this is the latch block's

2152 /// terminator. But in more complicated cases, this is a position which will

2153 /// dominate all the in-loop post-increment users.

2154Instruction *IVIncInsertPos =nullptr;

2155

2156 /// Interesting factors between use strides.

2157 ///

2158 /// We explicitly use a SetVector which contains a SmallSet, instead of the

2159 /// default, a SmallDenseSet, because we need to use the full range of

2160 /// int64_ts, and there's currently no good way of doing that with

2161 /// SmallDenseSet.

2162SetVector<int64_t, SmallVector<int64_t, 8>,SmallSet<int64_t, 8>> Factors;

2163

2164 /// The cost of the current SCEV, the best solution by LSR will be dropped if

2165 /// the solution is not profitable.

2166Cost BaselineCost;

2167

2168 /// Interesting use types, to facilitate truncation reuse.

2169SmallSetVector<Type *, 4>Types;

2170

2171 /// The list of interesting uses.

2172mutableSmallVector<LSRUse, 16>Uses;

2173

2174 /// Track which uses use which register candidates.

2175 RegUseTracker RegUses;

2176

2177// Limit the number of chains to avoid quadratic behavior. We don't expect to

2178// have more than a few IV increment chains in a loop. Missing a Chain falls

2179// back to normal LSR behavior for those uses.

2180staticconstunsigned MaxChains = 8;

2181

2182 /// IV users can form a chain of IV increments.

2183SmallVector<IVChain, MaxChains> IVChainVec;

2184

2185 /// IV users that belong to profitable IVChains.

2186SmallPtrSet<Use*, MaxChains> IVIncSet;

2187

2188 /// Induction variables that were generated and inserted by the SCEV Expander.

2189SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;

2190

2191// Inserting instructions in the loop and using them as PHI's input could

2192// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the

2193// corresponding incoming block is not loop exiting). So collect all such

2194// instructions to form LCSSA for them later.

2195SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;

2196

2197void OptimizeShadowIV();

2198bool FindIVUserForCond(ICmpInst *Cond,IVStrideUse *&CondUse);

2199ICmpInst *OptimizeMax(ICmpInst *Cond,IVStrideUse* &CondUse);

2200void OptimizeLoopTermCond();

2201

2202void ChainInstruction(Instruction *UserInst,Instruction *IVOper,

2203SmallVectorImpl<ChainUsers> &ChainUsersVec);

2204void FinalizeChain(IVChain &Chain);

2205void CollectChains();

2206void GenerateIVChain(const IVChain &Chain,

2207SmallVectorImpl<WeakTrackingVH> &DeadInsts);

2208

2209void CollectInterestingTypesAndFactors();

2210void CollectFixupsAndInitialFormulae();

2211

2212// Support for sharing of LSRUses between LSRFixups.

2213usingUseMapTy =DenseMap<LSRUse::SCEVUseKindPair, size_t>;

2214 UseMapTy UseMap;

2215

2216bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,bool HasBaseReg,

2217 LSRUse::KindType Kind, MemAccessTy AccessTy);

2218

2219 std::pair<size_t, Immediate> getUse(constSCEV *&Expr, LSRUse::KindType Kind,

2220 MemAccessTy AccessTy);

2221

2222void DeleteUse(LSRUse &LU,size_t LUIdx);

2223

2224 LSRUse *FindUseWithSimilarFormula(const Formula &F,const LSRUse &OrigLU);

2225

2226void InsertInitialFormula(constSCEV *S, LSRUse &LU,size_t LUIdx);

2227void InsertSupplementalFormula(constSCEV *S, LSRUse &LU,size_t LUIdx);

2228void CountRegisters(const Formula &F,size_t LUIdx);

2229bool InsertFormula(LSRUse &LU,unsigned LUIdx,const Formula &F);

2230

2231void CollectLoopInvariantFixupsAndFormulae();

2232

2233void GenerateReassociations(LSRUse &LU,unsigned LUIdx, FormulaBase,

2234unsignedDepth = 0);

2235

2236void GenerateReassociationsImpl(LSRUse &LU,unsigned LUIdx,

2237const Formula &Base,unsignedDepth,

2238size_tIdx,bool IsScaledReg =false);

2239void GenerateCombinations(LSRUse &LU,unsigned LUIdx, FormulaBase);

2240void GenerateSymbolicOffsetsImpl(LSRUse &LU,unsigned LUIdx,

2241const Formula &Base,size_tIdx,

2242bool IsScaledReg =false);

2243void GenerateSymbolicOffsets(LSRUse &LU,unsigned LUIdx, FormulaBase);

2244void GenerateConstantOffsetsImpl(LSRUse &LU,unsigned LUIdx,

2245const Formula &Base,

2246constSmallVectorImpl<Immediate> &Worklist,

2247size_tIdx,bool IsScaledReg =false);

2248void GenerateConstantOffsets(LSRUse &LU,unsigned LUIdx, FormulaBase);

2249void GenerateICmpZeroScales(LSRUse &LU,unsigned LUIdx, FormulaBase);

2250void GenerateScales(LSRUse &LU,unsigned LUIdx, FormulaBase);

2251void GenerateTruncates(LSRUse &LU,unsigned LUIdx, FormulaBase);

2252void GenerateCrossUseConstantOffsets();

2253void GenerateAllReuseFormulae();

2254

2255void FilterOutUndesirableDedicatedRegisters();

2256

2257size_t EstimateSearchSpaceComplexity()const;

2258void NarrowSearchSpaceByDetectingSupersets();

2259void NarrowSearchSpaceByCollapsingUnrolledCode();

2260void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

2261void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

2262void NarrowSearchSpaceByFilterPostInc();

2263void NarrowSearchSpaceByDeletingCostlyFormulas();

2264void NarrowSearchSpaceByPickingWinnerRegs();

2265void NarrowSearchSpaceUsingHeuristics();

2266

2267void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

2268Cost &SolutionCost,

2269SmallVectorImpl<const Formula *> &Workspace,

2270constCost &CurCost,

2271constSmallPtrSet<const SCEV *, 16> &CurRegs,

2272DenseSet<const SCEV *> &VisitedRegs)const;

2273void Solve(SmallVectorImpl<const Formula *> &Solution)const;

2274

2275BasicBlock::iterator

2276 HoistInsertPosition(BasicBlock::iterator IP,

2277constSmallVectorImpl<Instruction *> &Inputs)const;

2278BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,

2279const LSRFixup &LF,

2280const LSRUse &LU)const;

2281

2282Value *Expand(const LSRUse &LU,const LSRFixup &LF,const Formula &F,

2283BasicBlock::iterator IP,

2284SmallVectorImpl<WeakTrackingVH> &DeadInsts)const;

2285void RewriteForPHI(PHINode *PN,const LSRUse &LU,const LSRFixup &LF,

2286const Formula &F,

2287SmallVectorImpl<WeakTrackingVH> &DeadInsts);

2288void Rewrite(const LSRUse &LU,const LSRFixup &LF,const Formula &F,

2289SmallVectorImpl<WeakTrackingVH> &DeadInsts);

2290void ImplementSolution(constSmallVectorImpl<const Formula *> &Solution);

2291

2292public:

2293 LSRInstance(Loop *L,IVUsers &IU,ScalarEvolution &SE,DominatorTree &DT,

2294LoopInfo &LI,constTargetTransformInfo &TTI,AssumptionCache &AC,

2295TargetLibraryInfo &TLI,MemorySSAUpdater *MSSAU);

2296

2297bool getChanged() const{return Changed; }

2298constSmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const{

2299return ScalarEvolutionIVs;

2300 }

2301

2302void print_factors_and_types(raw_ostream &OS)const;

2303void print_fixups(raw_ostream &OS)const;

2304void print_uses(raw_ostream &OS)const;

2305voidprint(raw_ostream &OS)const;

2306voiddump()const;

2307};

2308

2309}// end anonymous namespace

2310

2311/// If IV is used in a int-to-float cast inside the loop then try to eliminate

2312/// the cast operation.

2313void LSRInstance::OptimizeShadowIV() {

2314constSCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);

2315if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))

2316return;

2317

2318for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();

2319 UI != E;/* empty */) {

2320IVUsers::const_iterator CandidateUI = UI;

2321 ++UI;

2322Instruction *ShadowUse = CandidateUI->getUser();

2323Type *DestTy =nullptr;

2324bool IsSigned =false;

2325

2326/* If shadow use is a int->float cast then insert a second IV

2327 to eliminate this cast.

2328

2329 for (unsigned i = 0; i < n; ++i)

2330 foo((double)i);

2331

2332 is transformed into

2333

2334 double d = 0.0;

2335 for (unsigned i = 0; i < n; ++i, ++d)

2336 foo(d);

2337 */

2338if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {

2339 IsSigned =false;

2340 DestTy = UCast->getDestTy();

2341 }

2342elseif (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {

2343 IsSigned =true;

2344 DestTy = SCast->getDestTy();

2345 }

2346if (!DestTy)continue;

2347

2348// If target does not support DestTy natively then do not apply

2349// this transformation.

2350if (!TTI.isTypeLegal(DestTy))continue;

2351

2352PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));

2353if (!PH)continue;

2354if (PH->getNumIncomingValues() != 2)continue;

2355

2356// If the calculation in integers overflows, the result in FP type will

2357// differ. So we only can do this transformation if we are guaranteed to not

2358// deal with overflowing values

2359constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));

2360if (!AR)continue;

2361if (IsSigned && !AR->hasNoSignedWrap())continue;

2362if (!IsSigned && !AR->hasNoUnsignedWrap())continue;

2363

2364Type *SrcTy = PH->getType();

2365int Mantissa = DestTy->getFPMantissaWidth();

2366if (Mantissa == -1)continue;

2367if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)

2368continue;

2369

2370unsignedEntry, Latch;

2371if (PH->getIncomingBlock(0) ==L->getLoopPreheader()) {

2372Entry = 0;

2373 Latch = 1;

2374 }else {

2375Entry = 1;

2376 Latch = 0;

2377 }

2378

2379ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));

2380if (!Init)continue;

2381Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?

2382 (double)Init->getSExtValue() :

2383 (double)Init->getZExtValue());

2384

2385BinaryOperator *Incr =

2386 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));

2387if (!Incr)continue;

2388if (Incr->getOpcode() != Instruction::Add

2389 && Incr->getOpcode() != Instruction::Sub)

2390continue;

2391

2392/* Initialize new IV, double d = 0.0 in above example. */

2393ConstantInt *C =nullptr;

2394if (Incr->getOperand(0) == PH)

2395C = dyn_cast<ConstantInt>(Incr->getOperand(1));

2396elseif (Incr->getOperand(1) == PH)

2397C = dyn_cast<ConstantInt>(Incr->getOperand(0));

2398else

2399continue;

2400

2401if (!C)continue;

2402

2403// Ignore negative constants, as the code below doesn't handle them

2404// correctly. TODO: Remove this restriction.

2405if (!C->getValue().isStrictlyPositive())

2406continue;

2407

2408/* Add new PHINode. */

2409PHINode *NewPH =PHINode::Create(DestTy, 2,"IV.S.", PH->getIterator());

2410 NewPH->setDebugLoc(PH->getDebugLoc());

2411

2412/* create new increment. '++d' in above example. */

2413Constant *CFP = ConstantFP::get(DestTy,C->getZExtValue());

2414BinaryOperator *NewIncr =BinaryOperator::Create(

2415 Incr->getOpcode() == Instruction::Add ? Instruction::FAdd

2416 : Instruction::FSub,

2417 NewPH, CFP,"IV.S.next.", Incr->getIterator());

2418 NewIncr->setDebugLoc(Incr->getDebugLoc());

2419

2420 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));

2421 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));

2422

2423/* Remove cast operation */

2424 ShadowUse->replaceAllUsesWith(NewPH);

2425 ShadowUse->eraseFromParent();

2426 Changed =true;

2427break;

2428 }

2429}

2430

2431/// If Cond has an operand that is an expression of an IV, set the IV user and

2432/// stride information and return true, otherwise return false.

2433bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,IVStrideUse *&CondUse) {

2434for (IVStrideUse &U : IU)

2435if (U.getUser() ==Cond) {

2436// NOTE: we could handle setcc instructions with multiple uses here, but

2437// InstCombine does it as well for simple uses, it's not clear that it

2438// occurs enough in real life to handle.

2439 CondUse = &U;

2440returntrue;

2441 }

2442returnfalse;

2443}

2444

2445/// Rewrite the loop's terminating condition if it uses a max computation.

2446///

2447/// This is a narrow solution to a specific, but acute, problem. For loops

2448/// like this:

2449///

2450/// i = 0;

2451/// do {

2452/// p[i] = 0.0;

2453/// } while (++i < n);

2454///

2455/// the trip count isn't just 'n', because 'n' might not be positive. And

2456/// unfortunately this can come up even for loops where the user didn't use

2457/// a C do-while loop. For example, seemingly well-behaved top-test loops

2458/// will commonly be lowered like this:

2459///

2460/// if (n > 0) {

2461/// i = 0;

2462/// do {

2463/// p[i] = 0.0;

2464/// } while (++i < n);

2465/// }

2466///

2467/// and then it's possible for subsequent optimization to obscure the if

2468/// test in such a way that indvars can't find it.

2469///

2470/// When indvars can't find the if test in loops like this, it creates a

2471/// max expression, which allows it to give the loop a canonical

2472/// induction variable:

2473///

2474/// i = 0;

2475/// max = n < 1 ? 1 : n;

2476/// do {

2477/// p[i] = 0.0;

2478/// } while (++i != max);

2479///

2480/// Canonical induction variables are necessary because the loop passes

2481/// are designed around them. The most obvious example of this is the

2482/// LoopInfo analysis, which doesn't remember trip count values. It

2483/// expects to be able to rediscover the trip count each time it is

2484/// needed, and it does this using a simple analysis that only succeeds if

2485/// the loop has a canonical induction variable.

2486///

2487/// However, when it comes time to generate code, the maximum operation

2488/// can be quite costly, especially if it's inside of an outer loop.

2489///

2490/// This function solves this problem by detecting this type of loop and

2491/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting

2492/// the instructions for the maximum computation.

2493ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond,IVStrideUse* &CondUse) {

2494// Check that the loop matches the pattern we're looking for.

2495if (Cond->getPredicate() !=CmpInst::ICMP_EQ &&

2496Cond->getPredicate() !=CmpInst::ICMP_NE)

2497returnCond;

2498

2499SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));

2500if (!Sel || !Sel->hasOneUse())returnCond;

2501

2502constSCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);

2503if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))

2504returnCond;

2505constSCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);

2506

2507// Add one to the backedge-taken count to get the trip count.

2508constSCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);

2509if (IterationCount != SE.getSCEV(Sel))returnCond;

2510

2511// Check for a max calculation that matches the pattern. There's no check

2512// for ICMP_ULE here because the comparison would be with zero, which

2513// isn't interesting.

2514CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;

2515constSCEVNAryExpr *Max =nullptr;

2516if (constSCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {

2517 Pred = ICmpInst::ICMP_SLE;

2518Max = S;

2519 }elseif (constSCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {

2520 Pred = ICmpInst::ICMP_SLT;

2521Max = S;

2522 }elseif (constSCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {

2523 Pred = ICmpInst::ICMP_ULT;

2524Max =U;

2525 }else {

2526// No match; bail.

2527returnCond;

2528 }

2529

2530// To handle a max with more than two operands, this optimization would

2531// require additional checking and setup.

2532if (Max->getNumOperands() != 2)

2533returnCond;

2534

2535constSCEV *MaxLHS =Max->getOperand(0);

2536constSCEV *MaxRHS =Max->getOperand(1);

2537

2538// ScalarEvolution canonicalizes constants to the left. For < and >, look

2539// for a comparison with 1. For <= and >=, a comparison with zero.

2540if (!MaxLHS ||

2541 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))

2542returnCond;

2543

2544// Check the relevant induction variable for conformance to

2545// the pattern.

2546constSCEV *IV = SE.getSCEV(Cond->getOperand(0));

2547constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);

2548if (!AR || !AR->isAffine() ||

2549 AR->getStart() != One ||

2550 AR->getStepRecurrence(SE) != One)

2551returnCond;

2552

2553assert(AR->getLoop() == L &&

2554"Loop condition operand is an addrec in a different loop!");

2555

2556// Check the right operand of the select, and remember it, as it will

2557// be used in the new comparison instruction.

2558Value *NewRHS =nullptr;

2559if (ICmpInst::isTrueWhenEqual(Pred)) {

2560// Look for n+1, and grab n.

2561if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))

2562if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))

2563if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

2564 NewRHS = BO->getOperand(0);

2565if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))

2566if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))

2567if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

2568 NewRHS = BO->getOperand(0);

2569if (!NewRHS)

2570returnCond;

2571 }elseif (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)

2572 NewRHS = Sel->getOperand(1);

2573elseif (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)

2574 NewRHS = Sel->getOperand(2);

2575elseif (constSCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))

2576 NewRHS = SU->getValue();

2577else

2578// Max doesn't match expected pattern.

2579returnCond;

2580

2581// Determine the new comparison opcode. It may be signed or unsigned,

2582// and the original comparison may be either equality or inequality.

2583if (Cond->getPredicate() ==CmpInst::ICMP_EQ)

2584 Pred =CmpInst::getInversePredicate(Pred);

2585

2586// Ok, everything looks ok to change the condition into an SLT or SGE and

2587// delete the max calculation.

2588ICmpInst *NewCond =newICmpInst(Cond->getIterator(), Pred,

2589Cond->getOperand(0), NewRHS,"scmp");

2590

2591// Delete the max calculation instructions.

2592 NewCond->setDebugLoc(Cond->getDebugLoc());

2593Cond->replaceAllUsesWith(NewCond);

2594 CondUse->setUser(NewCond);

2595Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));

2596Cond->eraseFromParent();

2597 Sel->eraseFromParent();

2598if (Cmp->use_empty())

2599Cmp->eraseFromParent();

2600return NewCond;

2601}

2602

2603/// Change loop terminating condition to use the postinc iv when possible.

2604void

2605LSRInstance::OptimizeLoopTermCond() {

2606SmallPtrSet<Instruction *, 4> PostIncs;

2607

2608// We need a different set of heuristics for rotated and non-rotated loops.

2609// If a loop is rotated then the latch is also the backedge, so inserting

2610// post-inc expressions just before the latch is ideal. To reduce live ranges

2611// it also makes sense to rewrite terminating conditions to use post-inc

2612// expressions.

2613//

2614// If the loop is not rotated then the latch is not a backedge; the latch

2615// check is done in the loop head. Adding post-inc expressions before the

2616// latch will cause overlapping live-ranges of pre-inc and post-inc expressions

2617// in the loop body. In this case we do *not* want to use post-inc expressions

2618// in the latch check, and we want to insert post-inc expressions before

2619// the backedge.

2620BasicBlock *LatchBlock =L->getLoopLatch();

2621SmallVector<BasicBlock*, 8> ExitingBlocks;

2622L->getExitingBlocks(ExitingBlocks);

2623if (!llvm::is_contained(ExitingBlocks, LatchBlock)) {

2624// The backedge doesn't exit the loop; treat this as a head-tested loop.

2625 IVIncInsertPos = LatchBlock->getTerminator();

2626return;

2627 }

2628

2629// Otherwise treat this as a rotated loop.

2630for (BasicBlock *ExitingBlock : ExitingBlocks) {

2631// Get the terminating condition for the loop if possible. If we

2632// can, we want to change it to use a post-incremented version of its

2633// induction variable, to allow coalescing the live ranges for the IV into

2634// one register value.

2635

2636BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());

2637if (!TermBr)

2638continue;

2639// FIXME: Overly conservative, termination condition could be an 'or' etc..

2640if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))

2641continue;

2642

2643// Search IVUsesByStride to find Cond's IVUse if there is one.

2644IVStrideUse *CondUse =nullptr;

2645ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());

2646if (!FindIVUserForCond(Cond, CondUse))

2647continue;

2648

2649// If the trip count is computed in terms of a max (due to ScalarEvolution

2650// being unable to find a sufficient guard, for example), change the loop

2651// comparison to use SLT or ULT instead of NE.

2652// One consequence of doing this now is that it disrupts the count-down

2653// optimization. That's not always a bad thing though, because in such

2654// cases it may still be worthwhile to avoid a max.

2655Cond = OptimizeMax(Cond, CondUse);

2656

2657// If this exiting block dominates the latch block, it may also use

2658// the post-inc value if it won't be shared with other uses.

2659// Check for dominance.

2660if (!DT.dominates(ExitingBlock, LatchBlock))

2661continue;

2662

2663// Conservatively avoid trying to use the post-inc value in non-latch

2664// exits if there may be pre-inc users in intervening blocks.

2665if (LatchBlock != ExitingBlock)

2666for (constIVStrideUse &UI : IU)

2667// Test if the use is reachable from the exiting block. This dominator

2668// query is a conservative approximation of reachability.

2669if (&UI != CondUse &&

2670 !DT.properlyDominates(UI.getUser()->getParent(), ExitingBlock)) {

2671// Conservatively assume there may be reuse if the quotient of their

2672// strides could be a legal scale.

2673constSCEV *A = IU.getStride(*CondUse, L);

2674constSCEV *B = IU.getStride(UI, L);

2675if (!A || !B)continue;

2676if (SE.getTypeSizeInBits(A->getType()) !=

2677 SE.getTypeSizeInBits(B->getType())) {

2678if (SE.getTypeSizeInBits(A->getType()) >

2679 SE.getTypeSizeInBits(B->getType()))

2680B = SE.getSignExtendExpr(B,A->getType());

2681else

2682A = SE.getSignExtendExpr(A,B->getType());

2683 }

2684if (constSCEVConstant *D =

2685 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B,A, SE))) {

2686constConstantInt *C =D->getValue();

2687// Stride of one or negative one can have reuse with non-addresses.

2688if (C->isOne() ||C->isMinusOne())

2689goto decline_post_inc;

2690// Avoid weird situations.

2691if (C->getValue().getSignificantBits() >= 64 ||

2692C->getValue().isMinSignedValue())

2693goto decline_post_inc;

2694// Check for possible scaled-address reuse.

2695if (isAddressUse(TTI, UI.getUser(), UI.getOperandValToReplace())) {

2696 MemAccessTy AccessTy =

2697getAccessType(TTI, UI.getUser(), UI.getOperandValToReplace());

2698 int64_t Scale =C->getSExtValue();

2699if (TTI.isLegalAddressingMode(AccessTy.MemTy,/*BaseGV=*/nullptr,

2700/*BaseOffset=*/0,

2701/*HasBaseReg=*/true, Scale,

2702 AccessTy.AddrSpace))

2703goto decline_post_inc;

2704 Scale = -Scale;

2705if (TTI.isLegalAddressingMode(AccessTy.MemTy,/*BaseGV=*/nullptr,

2706/*BaseOffset=*/0,

2707/*HasBaseReg=*/true, Scale,

2708 AccessTy.AddrSpace))

2709goto decline_post_inc;

2710 }

2711 }

2712 }

2713

2714LLVM_DEBUG(dbgs() <<" Change loop exiting icmp to use postinc iv: "

2715 << *Cond <<'\n');

2716

2717// It's possible for the setcc instruction to be anywhere in the loop, and

2718// possible for it to have multiple users. If it is not immediately before

2719// the exiting block branch, move it.

2720if (Cond->getNextNonDebugInstruction() != TermBr) {

2721if (Cond->hasOneUse()) {

2722Cond->moveBefore(TermBr->getIterator());

2723 }else {

2724// Clone the terminating condition and insert into the loopend.

2725ICmpInst *OldCond =Cond;

2726Cond = cast<ICmpInst>(Cond->clone());

2727Cond->setName(L->getHeader()->getName() +".termcond");

2728Cond->insertInto(ExitingBlock, TermBr->getIterator());

2729

2730// Clone the IVUse, as the old use still exists!

2731 CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());

2732 TermBr->replaceUsesOfWith(OldCond,Cond);

2733 }

2734 }

2735

2736// If we get to here, we know that we can transform the setcc instruction to

2737// use the post-incremented version of the IV, allowing us to coalesce the

2738// live ranges for the IV correctly.

2739 CondUse->transformToPostInc(L);

2740 Changed =true;

2741

2742 PostIncs.insert(Cond);

2743 decline_post_inc:;

2744 }

2745

2746// Determine an insertion point for the loop induction variable increment. It

2747// must dominate all the post-inc comparisons we just set up, and it must

2748// dominate the loop latch edge.

2749 IVIncInsertPos =L->getLoopLatch()->getTerminator();

2750for (Instruction *Inst : PostIncs)

2751 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);

2752}

2753

2754/// Determine if the given use can accommodate a fixup at the given offset and

2755/// other details. If so, update the use and return true.

2756bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,

2757bool HasBaseReg, LSRUse::KindType Kind,

2758 MemAccessTy AccessTy) {

2759 Immediate NewMinOffset = LU.MinOffset;

2760 Immediate NewMaxOffset = LU.MaxOffset;

2761 MemAccessTy NewAccessTy = AccessTy;

2762

2763// Check for a mismatched kind. It's tempting to collapse mismatched kinds to

2764// something conservative, however this can pessimize in the case that one of

2765// the uses will have all its uses outside the loop, for example.

2766if (LU.Kind != Kind)

2767returnfalse;

2768

2769// Check for a mismatched access type, and fall back conservatively as needed.

2770// TODO: Be less conservative when the type is similar and can use the same

2771// addressing modes.

2772if (Kind == LSRUse::Address) {

2773if (AccessTy.MemTy != LU.AccessTy.MemTy) {

2774 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),

2775 AccessTy.AddrSpace);

2776 }

2777 }

2778

2779// Conservatively assume HasBaseReg is true for now.

2780if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {

2781if (!isAlwaysFoldable(TTI, Kind, NewAccessTy,/*BaseGV=*/nullptr,

2782 LU.MaxOffset - NewOffset, HasBaseReg))

2783returnfalse;

2784 NewMinOffset = NewOffset;

2785 }elseif (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {

2786if (!isAlwaysFoldable(TTI, Kind, NewAccessTy,/*BaseGV=*/nullptr,

2787 NewOffset - LU.MinOffset, HasBaseReg))

2788returnfalse;

2789 NewMaxOffset = NewOffset;

2790 }

2791

2792// FIXME: We should be able to handle some level of scalable offset support

2793// for 'void', but in order to get basic support up and running this is

2794// being left out.

2795if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&

2796 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))

2797returnfalse;

2798

2799// Update the use.

2800 LU.MinOffset = NewMinOffset;

2801 LU.MaxOffset = NewMaxOffset;

2802 LU.AccessTy = NewAccessTy;

2803returntrue;

2804}

2805

2806/// Return an LSRUse index and an offset value for a fixup which needs the given

2807/// expression, with the given kind and optional access type. Either reuse an

2808/// existing use or create a new one, as needed.

2809std::pair<size_t, Immediate> LSRInstance::getUse(constSCEV *&Expr,

2810 LSRUse::KindType Kind,

2811 MemAccessTy AccessTy) {

2812constSCEV *Copy = Expr;

2813 ImmediateOffset =ExtractImmediate(Expr, SE);

2814

2815// Basic uses can't accept any offset, for example.

2816if (!isAlwaysFoldable(TTI, Kind, AccessTy,/*BaseGV=*/nullptr,

2817Offset,/*HasBaseReg=*/true)) {

2818 Expr =Copy;

2819Offset = Immediate::getFixed(0);

2820 }

2821

2822 std::pair<UseMapTy::iterator, bool>P =

2823 UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));

2824if (!P.second) {

2825// A use already existed with this base.

2826size_t LUIdx =P.first->second;

2827 LSRUse &LU =Uses[LUIdx];

2828if (reconcileNewOffset(LU,Offset,/*HasBaseReg=*/true, Kind, AccessTy))

2829// Reuse this use.

2830return std::make_pair(LUIdx,Offset);

2831 }

2832

2833// Create a new use.

2834size_t LUIdx =Uses.size();

2835P.first->second = LUIdx;

2836Uses.push_back(LSRUse(Kind, AccessTy));

2837 LSRUse &LU =Uses[LUIdx];

2838

2839 LU.MinOffset =Offset;

2840 LU.MaxOffset =Offset;

2841return std::make_pair(LUIdx,Offset);

2842}

2843

2844/// Delete the given use from the Uses list.

2845void LSRInstance::DeleteUse(LSRUse &LU,size_t LUIdx) {

2846if (&LU != &Uses.back())

2847std::swap(LU,Uses.back());

2848Uses.pop_back();

2849

2850// Update RegUses.

2851 RegUses.swapAndDropUse(LUIdx,Uses.size());

2852}

2853

2854/// Look for a use distinct from OrigLU which is has a formula that has the same

2855/// registers as the given formula.

2856LSRUse *

2857LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,

2858const LSRUse &OrigLU) {

2859// Search all uses for the formula. This could be more clever.

2860for (LSRUse &LU :Uses) {

2861// Check whether this use is close enough to OrigLU, to see whether it's

2862// worthwhile looking through its formulae.

2863// Ignore ICmpZero uses because they may contain formulae generated by

2864// GenerateICmpZeroScales, in which case adding fixup offsets may

2865// be invalid.

2866if (&LU != &OrigLU &&

2867 LU.Kind != LSRUse::ICmpZero &&

2868 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&

2869 LU.WidestFixupType == OrigLU.WidestFixupType &&

2870 LU.HasFormulaWithSameRegs(OrigF)) {

2871// Scan through this use's formulae.

2872for (const Formula &F : LU.Formulae) {

2873// Check to see if this formula has the same registers and symbols

2874// as OrigF.

2875if (F.BaseRegs == OrigF.BaseRegs &&

2876F.ScaledReg == OrigF.ScaledReg &&

2877F.BaseGV == OrigF.BaseGV &&

2878F.Scale == OrigF.Scale &&

2879F.UnfoldedOffset == OrigF.UnfoldedOffset) {

2880if (F.BaseOffset.isZero())

2881return &LU;

2882// This is the formula where all the registers and symbols matched;

2883// there aren't going to be any others. Since we declined it, we

2884// can skip the rest of the formulae and proceed to the next LSRUse.

2885break;

2886 }

2887 }

2888 }

2889 }

2890

2891// Nothing looked good.

2892returnnullptr;

2893}

2894

2895void LSRInstance::CollectInterestingTypesAndFactors() {

2896SmallSetVector<const SCEV *, 4> Strides;

2897

2898// Collect interesting types and strides.

2899SmallVector<const SCEV *, 4> Worklist;

2900for (constIVStrideUse &U : IU) {

2901constSCEV *Expr = IU.getExpr(U);

2902if (!Expr)

2903continue;

2904

2905// Collect interesting types.

2906Types.insert(SE.getEffectiveSCEVType(Expr->getType()));

2907

2908// Add strides for mentioned loops.

2909 Worklist.push_back(Expr);

2910do {

2911constSCEV *S = Worklist.pop_back_val();

2912if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

2913if (AR->getLoop() == L)

2914 Strides.insert(AR->getStepRecurrence(SE));

2915 Worklist.push_back(AR->getStart());

2916 }elseif (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

2917append_range(Worklist,Add->operands());

2918 }

2919 }while (!Worklist.empty());

2920 }

2921

2922// Compute interesting factors from the set of interesting strides.

2923for (SmallSetVector<const SCEV *, 4>::const_iterator

2924I = Strides.begin(), E = Strides.end();I != E; ++I)

2925for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =

2926 std::next(I); NewStrideIter != E; ++NewStrideIter) {

2927constSCEV *OldStride = *I;

2928constSCEV *NewStride = *NewStrideIter;

2929

2930if (SE.getTypeSizeInBits(OldStride->getType()) !=

2931 SE.getTypeSizeInBits(NewStride->getType())) {

2932if (SE.getTypeSizeInBits(OldStride->getType()) >

2933 SE.getTypeSizeInBits(NewStride->getType()))

2934 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());

2935else

2936 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());

2937 }

2938if (constSCEVConstant *Factor =

2939 dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,

2940 SE,true))) {

2941if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

2942 Factors.insert(Factor->getAPInt().getSExtValue());

2943 }elseif (constSCEVConstant *Factor =

2944 dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,

2945 NewStride,

2946 SE,true))) {

2947if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

2948 Factors.insert(Factor->getAPInt().getSExtValue());

2949 }

2950 }

2951

2952// If all uses use the same type, don't bother looking for truncation-based

2953// reuse.

2954if (Types.size() == 1)

2955Types.clear();

2956

2957LLVM_DEBUG(print_factors_and_types(dbgs()));

2958}

2959

2960/// Helper for CollectChains that finds an IV operand (computed by an AddRec in

2961/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to

2962/// IVStrideUses, we could partially skip this.

2963staticUser::op_iterator

2964findIVOperand(User::op_iterator OI,User::op_iterator OE,

2965Loop *L,ScalarEvolution &SE) {

2966for(; OI != OE; ++OI) {

2967if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {

2968if (!SE.isSCEVable(Oper->getType()))

2969continue;

2970

2971if (constSCEVAddRecExpr *AR =

2972 dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {

2973if (AR->getLoop() == L)

2974break;

2975 }

2976 }

2977 }

2978return OI;

2979}

2980

2981/// IVChain logic must consistently peek base TruncInst operands, so wrap it in

2982/// a convenient helper.

2983staticValue *getWideOperand(Value *Oper) {

2984if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))

2985return Trunc->getOperand(0);

2986return Oper;

2987}

2988

2989/// Return an approximation of this SCEV expression's "base", or NULL for any

2990/// constant. Returning the expression itself is conservative. Returning a

2991/// deeper subexpression is more precise and valid as long as it isn't less

2992/// complex than another subexpression. For expressions involving multiple

2993/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids

2994/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],

2995/// IVInc==b-a.

2996///

2997/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost

2998/// SCEVUnknown, we simply return the rightmost SCEV operand.

2999staticconstSCEV *getExprBase(constSCEV *S) {

3000switch (S->getSCEVType()) {

3001default:// including scUnknown.

3002return S;

3003casescConstant:

3004casescVScale:

3005returnnullptr;

3006casescTruncate:

3007returngetExprBase(cast<SCEVTruncateExpr>(S)->getOperand());

3008casescZeroExtend:

3009returngetExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());

3010casescSignExtend:

3011returngetExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());

3012casescAddExpr: {

3013// Skip over scaled operands (scMulExpr) to follow add operands as long as

3014// there's nothing more complex.

3015// FIXME: not sure if we want to recognize negation.

3016constSCEVAddExpr *Add = cast<SCEVAddExpr>(S);

3017for (constSCEV *SubExpr :reverse(Add->operands())) {

3018if (SubExpr->getSCEVType() ==scAddExpr)

3019returngetExprBase(SubExpr);

3020

3021if (SubExpr->getSCEVType() !=scMulExpr)

3022return SubExpr;

3023 }

3024return S;// all operands are scaled, be conservative.

3025 }

3026casescAddRecExpr:

3027returngetExprBase(cast<SCEVAddRecExpr>(S)->getStart());

3028 }

3029llvm_unreachable("Unknown SCEV kind!");

3030}

3031

3032/// Return true if the chain increment is profitable to expand into a loop

3033/// invariant value, which may require its own register. A profitable chain

3034/// increment will be an offset relative to the same base. We allow such offsets

3035/// to potentially be used as chain increment as long as it's not obviously

3036/// expensive to expand using real instructions.

3037bool IVChain::isProfitableIncrement(constSCEV *OperExpr,

3038constSCEV *IncExpr,

3039ScalarEvolution &SE) {

3040// Aggressively form chains when -stress-ivchain.

3041if (StressIVChain)

3042returntrue;

3043

3044// Do not replace a constant offset from IV head with a nonconstant IV

3045// increment.

3046if (!isa<SCEVConstant>(IncExpr)) {

3047constSCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));

3048if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))

3049returnfalse;

3050 }

3051

3052SmallPtrSet<const SCEV*, 8> Processed;

3053return !isHighCostExpansion(IncExpr, Processed, SE);

3054}

3055

3056/// Return true if the number of registers needed for the chain is estimated to

3057/// be less than the number required for the individual IV users. First prohibit

3058/// any IV users that keep the IV live across increments (the Users set should

3059/// be empty). Next count the number and type of increments in the chain.

3060///

3061/// Chaining IVs can lead to considerable code bloat if ISEL doesn't

3062/// effectively use postinc addressing modes. Only consider it profitable it the

3063/// increments can be computed in fewer registers when chained.

3064///

3065/// TODO: Consider IVInc free if it's already used in another chains.

3066staticboolisProfitableChain(IVChain &Chain,

3067SmallPtrSetImpl<Instruction *> &Users,

3068ScalarEvolution &SE,

3069constTargetTransformInfo &TTI) {

3070if (StressIVChain)

3071returntrue;

3072

3073if (!Chain.hasIncs())

3074returnfalse;

3075

3076if (!Users.empty()) {

3077LLVM_DEBUG(dbgs() <<"Chain: " << *Chain.Incs[0].UserInst <<" users:\n";

3078for (Instruction *Inst

3079 :Users) {dbgs() <<" " << *Inst <<"\n"; });

3080returnfalse;

3081 }

3082assert(!Chain.Incs.empty() &&"empty IV chains are not allowed");

3083

3084// The chain itself may require a register, so intialize cost to 1.

3085int cost = 1;

3086

3087// A complete chain likely eliminates the need for keeping the original IV in

3088// a register. LSR does not currently know how to form a complete chain unless

3089// the header phi already exists.

3090if (isa<PHINode>(Chain.tailUserInst())

3091 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {

3092 --cost;

3093 }

3094constSCEV *LastIncExpr =nullptr;

3095unsigned NumConstIncrements = 0;

3096unsigned NumVarIncrements = 0;

3097unsigned NumReusedIncrements = 0;

3098

3099if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))

3100returntrue;

3101

3102for (const IVInc &Inc : Chain) {

3103if (TTI.isProfitableLSRChainElement(Inc.UserInst))

3104returntrue;

3105if (Inc.IncExpr->isZero())

3106continue;

3107

3108// Incrementing by zero or some constant is neutral. We assume constants can

3109// be folded into an addressing mode or an add's immediate operand.

3110if (isa<SCEVConstant>(Inc.IncExpr)) {

3111 ++NumConstIncrements;

3112continue;

3113 }

3114

3115if (Inc.IncExpr == LastIncExpr)

3116 ++NumReusedIncrements;

3117else

3118 ++NumVarIncrements;

3119

3120 LastIncExpr = Inc.IncExpr;

3121 }

3122// An IV chain with a single increment is handled by LSR's postinc

3123// uses. However, a chain with multiple increments requires keeping the IV's

3124// value live longer than it needs to be if chained.

3125if (NumConstIncrements > 1)

3126 --cost;

3127

3128// Materializing increment expressions in the preheader that didn't exist in

3129// the original code may cost a register. For example, sign-extended array

3130// indices can produce ridiculous increments like this:

3131// IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))

3132 cost += NumVarIncrements;

3133

3134// Reusing variable increments likely saves a register to hold the multiple of

3135// the stride.

3136 cost -= NumReusedIncrements;

3137

3138LLVM_DEBUG(dbgs() <<"Chain: " << *Chain.Incs[0].UserInst <<" Cost: " << cost

3139 <<"\n");

3140

3141return cost < 0;

3142}

3143

3144/// Add this IV user to an existing chain or make it the head of a new chain.

3145void LSRInstance::ChainInstruction(Instruction *UserInst,Instruction *IVOper,

3146SmallVectorImpl<ChainUsers> &ChainUsersVec) {

3147// When IVs are used as types of varying widths, they are generally converted

3148// to a wider type with some uses remaining narrow under a (free) trunc.

3149Value *const NextIV =getWideOperand(IVOper);

3150constSCEV *const OperExpr = SE.getSCEV(NextIV);

3151constSCEV *const OperExprBase =getExprBase(OperExpr);

3152

3153// Visit all existing chains. Check if its IVOper can be computed as a

3154// profitable loop invariant increment from the last link in the Chain.

3155unsigned ChainIdx = 0, NChains = IVChainVec.size();

3156constSCEV *LastIncExpr =nullptr;

3157for (; ChainIdx < NChains; ++ChainIdx) {

3158 IVChain &Chain = IVChainVec[ChainIdx];

3159

3160// Prune the solution space aggressively by checking that both IV operands

3161// are expressions that operate on the same unscaled SCEVUnknown. This

3162// "base" will be canceled by the subsequent getMinusSCEV call. Checking

3163// first avoids creating extra SCEV expressions.

3164if (!StressIVChain && Chain.ExprBase != OperExprBase)

3165continue;

3166

3167Value *PrevIV =getWideOperand(Chain.Incs.back().IVOperand);

3168if (PrevIV->getType() != NextIV->getType())

3169continue;

3170

3171// A phi node terminates a chain.

3172if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))

3173continue;

3174

3175// The increment must be loop-invariant so it can be kept in a register.

3176constSCEV *PrevExpr = SE.getSCEV(PrevIV);

3177constSCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);

3178if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))

3179continue;

3180

3181if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {

3182 LastIncExpr = IncExpr;

3183break;

3184 }

3185 }

3186// If we haven't found a chain, create a new one, unless we hit the max. Don't

3187// bother for phi nodes, because they must be last in the chain.

3188if (ChainIdx == NChains) {

3189if (isa<PHINode>(UserInst))

3190return;

3191if (NChains >= MaxChains && !StressIVChain) {

3192LLVM_DEBUG(dbgs() <<"IV Chain Limit\n");

3193return;

3194 }

3195 LastIncExpr = OperExpr;

3196// IVUsers may have skipped over sign/zero extensions. We don't currently

3197// attempt to form chains involving extensions unless they can be hoisted

3198// into this loop's AddRec.

3199if (!isa<SCEVAddRecExpr>(LastIncExpr))

3200return;

3201 ++NChains;

3202 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),

3203 OperExprBase));

3204 ChainUsersVec.resize(NChains);

3205LLVM_DEBUG(dbgs() <<"IV Chain#" << ChainIdx <<" Head: (" << *UserInst

3206 <<") IV=" << *LastIncExpr <<"\n");

3207 }else {

3208LLVM_DEBUG(dbgs() <<"IV Chain#" << ChainIdx <<" Inc: (" << *UserInst

3209 <<") IV+" << *LastIncExpr <<"\n");

3210// Add this IV user to the end of the chain.

3211 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));

3212 }

3213 IVChain &Chain = IVChainVec[ChainIdx];

3214

3215SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;

3216// This chain's NearUsers become FarUsers.

3217if (!LastIncExpr->isZero()) {

3218 ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),

3219 NearUsers.end());

3220 NearUsers.clear();

3221 }

3222

3223// All other uses of IVOperand become near uses of the chain.

3224// We currently ignore intermediate values within SCEV expressions, assuming

3225// they will eventually be used be the current chain, or can be computed

3226// from one of the chain increments. To be more precise we could

3227// transitively follow its user and only add leaf IV users to the set.

3228for (User *U : IVOper->users()) {

3229Instruction *OtherUse = dyn_cast<Instruction>(U);

3230if (!OtherUse)

3231continue;

3232// Uses in the chain will no longer be uses if the chain is formed.

3233// Include the head of the chain in this iteration (not Chain.begin()).

3234 IVChain::const_iterator IncIter = Chain.Incs.begin();

3235 IVChain::const_iterator IncEnd = Chain.Incs.end();

3236for( ; IncIter != IncEnd; ++IncIter) {

3237if (IncIter->UserInst == OtherUse)

3238break;

3239 }

3240if (IncIter != IncEnd)

3241continue;

3242

3243if (SE.isSCEVable(OtherUse->getType())

3244 && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))

3245 && IU.isIVUserOrOperand(OtherUse)) {

3246continue;

3247 }

3248 NearUsers.insert(OtherUse);

3249 }

3250

3251// Since this user is part of the chain, it's no longer considered a use

3252// of the chain.

3253 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);

3254}

3255

3256/// Populate the vector of Chains.

3257///

3258/// This decreases ILP at the architecture level. Targets with ample registers,

3259/// multiple memory ports, and no register renaming probably don't want

3260/// this. However, such targets should probably disable LSR altogether.

3261///

3262/// The job of LSR is to make a reasonable choice of induction variables across

3263/// the loop. Subsequent passes can easily "unchain" computation exposing more

3264/// ILP *within the loop* if the target wants it.

3265///

3266/// Finding the best IV chain is potentially a scheduling problem. Since LSR

3267/// will not reorder memory operations, it will recognize this as a chain, but

3268/// will generate redundant IV increments. Ideally this would be corrected later

3269/// by a smart scheduler:

3270/// = A[i]

3271/// = A[i+x]

3272/// A[i] =

3273/// A[i+x] =

3274///

3275/// TODO: Walk the entire domtree within this loop, not just the path to the

3276/// loop latch. This will discover chains on side paths, but requires

3277/// maintaining multiple copies of the Chains state.

3278void LSRInstance::CollectChains() {

3279LLVM_DEBUG(dbgs() <<"Collecting IV Chains.\n");

3280SmallVector<ChainUsers, 8> ChainUsersVec;

3281

3282SmallVector<BasicBlock *,8> LatchPath;

3283BasicBlock *LoopHeader =L->getHeader();

3284for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());

3285 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {

3286 LatchPath.push_back(Rung->getBlock());

3287 }

3288 LatchPath.push_back(LoopHeader);

3289

3290// Walk the instruction stream from the loop header to the loop latch.

3291for (BasicBlock *BB :reverse(LatchPath)) {

3292for (Instruction &I : *BB) {

3293// Skip instructions that weren't seen by IVUsers analysis.

3294if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))

3295continue;

3296

3297// Ignore users that are part of a SCEV expression. This way we only

3298// consider leaf IV Users. This effectively rediscovers a portion of

3299// IVUsers analysis but in program order this time.

3300if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))

3301continue;

3302

3303// Remove this instruction from any NearUsers set it may be in.

3304for (unsigned ChainIdx = 0, NChains = IVChainVec.size();

3305 ChainIdx < NChains; ++ChainIdx) {

3306 ChainUsersVec[ChainIdx].NearUsers.erase(&I);

3307 }

3308// Search for operands that can be chained.

3309SmallPtrSet<Instruction*, 4> UniqueOperands;

3310User::op_iterator IVOpEnd =I.op_end();

3311User::op_iterator IVOpIter =findIVOperand(I.op_begin(), IVOpEnd, L, SE);

3312while (IVOpIter != IVOpEnd) {

3313Instruction *IVOpInst = cast<Instruction>(*IVOpIter);

3314if (UniqueOperands.insert(IVOpInst).second)

3315 ChainInstruction(&I, IVOpInst, ChainUsersVec);

3316 IVOpIter =findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

3317 }

3318 }// Continue walking down the instructions.

3319 }// Continue walking down the domtree.

3320// Visit phi backedges to determine if the chain can generate the IV postinc.

3321for (PHINode &PN :L->getHeader()->phis()) {

3322if (!SE.isSCEVable(PN.getType()))

3323continue;

3324

3325Instruction *IncV =

3326 dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));

3327if (IncV)

3328 ChainInstruction(&PN, IncV, ChainUsersVec);

3329 }

3330// Remove any unprofitable chains.

3331unsigned ChainIdx = 0;

3332for (unsigned UsersIdx = 0, NChains = IVChainVec.size();

3333 UsersIdx < NChains; ++UsersIdx) {

3334if (!isProfitableChain(IVChainVec[UsersIdx],

3335 ChainUsersVec[UsersIdx].FarUsers, SE,TTI))

3336continue;

3337// Preserve the chain at UsesIdx.

3338if (ChainIdx != UsersIdx)

3339 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];

3340 FinalizeChain(IVChainVec[ChainIdx]);

3341 ++ChainIdx;

3342 }

3343 IVChainVec.resize(ChainIdx);

3344}

3345

3346void LSRInstance::FinalizeChain(IVChain &Chain) {

3347assert(!Chain.Incs.empty() &&"empty IV chains are not allowed");

3348LLVM_DEBUG(dbgs() <<"Final Chain: " << *Chain.Incs[0].UserInst <<"\n");

3349

3350for (const IVInc &Inc : Chain) {

3351LLVM_DEBUG(dbgs() <<" Inc: " << *Inc.UserInst <<"\n");

3352auto UseI =find(Inc.UserInst->operands(), Inc.IVOperand);

3353assert(UseI != Inc.UserInst->op_end() &&"cannot find IV operand");

3354 IVIncSet.insert(UseI);

3355 }

3356}

3357

3358/// Return true if the IVInc can be folded into an addressing mode.

3359staticboolcanFoldIVIncExpr(constSCEV *IncExpr,Instruction *UserInst,

3360Value *Operand,constTargetTransformInfo &TTI) {

3361constSCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);

3362 Immediate IncOffset = Immediate::getZero();

3363if (IncConst) {

3364if (IncConst && IncConst->getAPInt().getSignificantBits() > 64)

3365returnfalse;

3366 IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue());

3367 }else {

3368// Look for mul(vscale, constant), to detect a scalable offset.

3369auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);

3370if (!IncVScale || IncVScale->getNumOperands() != 2 ||

3371 !isa<SCEVVScale>(IncVScale->getOperand(1)))

3372returnfalse;

3373auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));

3374if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)

3375returnfalse;

3376 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());

3377 }

3378

3379if (!isAddressUse(TTI, UserInst, Operand))

3380returnfalse;

3381

3382 MemAccessTy AccessTy =getAccessType(TTI, UserInst, Operand);

3383if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy,/*BaseGV=*/nullptr,

3384 IncOffset,/*HasBaseReg=*/false))

3385returnfalse;

3386

3387returntrue;

3388}

3389

3390/// Generate an add or subtract for each IVInc in a chain to materialize the IV

3391/// user's operand from the previous IV user's operand.

3392void LSRInstance::GenerateIVChain(const IVChain &Chain,

3393SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

3394// Find the new IVOperand for the head of the chain. It may have been replaced

3395// by LSR.

3396const IVInc &Head = Chain.Incs[0];

3397User::op_iterator IVOpEnd = Head.UserInst->op_end();

3398// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.

3399User::op_iterator IVOpIter =findIVOperand(Head.UserInst->op_begin(),

3400 IVOpEnd, L, SE);

3401Value *IVSrc =nullptr;

3402while (IVOpIter != IVOpEnd) {

3403 IVSrc =getWideOperand(*IVOpIter);

3404

3405// If this operand computes the expression that the chain needs, we may use

3406// it. (Check this after setting IVSrc which is used below.)

3407//

3408// Note that if Head.IncExpr is wider than IVSrc, then this phi is too

3409// narrow for the chain, so we can no longer use it. We do allow using a

3410// wider phi, assuming the LSR checked for free truncation. In that case we

3411// should already have a truncate on this operand such that

3412// getSCEV(IVSrc) == IncExpr.

3413if (SE.getSCEV(*IVOpIter) == Head.IncExpr

3414 || SE.getSCEV(IVSrc) == Head.IncExpr) {

3415break;

3416 }

3417 IVOpIter =findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

3418 }

3419if (IVOpIter == IVOpEnd) {

3420// Gracefully give up on this chain.

3421LLVM_DEBUG(dbgs() <<"Concealed chain head: " << *Head.UserInst <<"\n");

3422return;

3423 }

3424assert(IVSrc &&"Failed to find IV chain source");

3425

3426LLVM_DEBUG(dbgs() <<"Generate chain at: " << *IVSrc <<"\n");

3427Type *IVTy = IVSrc->getType();

3428Type *IntTy = SE.getEffectiveSCEVType(IVTy);

3429constSCEV *LeftOverExpr =nullptr;

3430constSCEV *Accum = SE.getZero(IntTy);

3431SmallVector<std::pair<const SCEV *, Value *>> Bases;

3432 Bases.emplace_back(Accum, IVSrc);

3433

3434for (const IVInc &Inc : Chain) {

3435Instruction *InsertPt = Inc.UserInst;

3436if (isa<PHINode>(InsertPt))

3437 InsertPt =L->getLoopLatch()->getTerminator();

3438

3439// IVOper will replace the current IV User's operand. IVSrc is the IV

3440// value currently held in a register.

3441Value *IVOper = IVSrc;

3442if (!Inc.IncExpr->isZero()) {

3443// IncExpr was the result of subtraction of two narrow values, so must

3444// be signed.

3445constSCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);

3446 Accum = SE.getAddExpr(Accum, IncExpr);

3447 LeftOverExpr = LeftOverExpr ?

3448 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;

3449 }

3450

3451// Look through each base to see if any can produce a nice addressing mode.

3452bool FoundBase =false;

3453for (auto [MapScev, MapIVOper] :reverse(Bases)) {

3454constSCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);

3455if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand,TTI)) {

3456if (!Remainder->isZero()) {

3457Rewriter.clearPostInc();

3458Value *IncV =Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);

3459constSCEV *IVOperExpr =

3460 SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV));

3461 IVOper =Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);

3462 }else {

3463 IVOper = MapIVOper;

3464 }

3465

3466 FoundBase =true;

3467break;

3468 }

3469 }

3470if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {

3471// Expand the IV increment.

3472Rewriter.clearPostInc();

3473Value *IncV =Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);

3474constSCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),

3475 SE.getUnknown(IncV));

3476 IVOper =Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);

3477

3478// If an IV increment can't be folded, use it as the next IV value.

3479if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand,TTI)) {

3480assert(IVTy == IVOper->getType() &&"inconsistent IV increment type");

3481 Bases.emplace_back(Accum, IVOper);

3482 IVSrc = IVOper;

3483 LeftOverExpr =nullptr;

3484 }

3485 }

3486Type *OperTy = Inc.IVOperand->getType();

3487if (IVTy != OperTy) {

3488assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&

3489"cannot extend a chained IV");

3490IRBuilder<> Builder(InsertPt);

3491 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,"lsr.chain");

3492 }

3493 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);

3494if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))

3495 DeadInsts.emplace_back(OperandIsInstr);

3496 }

3497// If LSR created a new, wider phi, we may also replace its postinc. We only

3498// do this if we also found a wide value for the head of the chain.

3499if (isa<PHINode>(Chain.tailUserInst())) {

3500for (PHINode &Phi :L->getHeader()->phis()) {

3501if (Phi.getType() != IVSrc->getType())

3502continue;

3503Instruction *PostIncV = dyn_cast<Instruction>(

3504Phi.getIncomingValueForBlock(L->getLoopLatch()));

3505if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))

3506continue;

3507Value *IVOper = IVSrc;

3508Type *PostIncTy = PostIncV->getType();

3509if (IVTy != PostIncTy) {

3510assert(PostIncTy->isPointerTy() &&"mixing int/ptr IV types");

3511IRBuilder<> Builder(L->getLoopLatch()->getTerminator());

3512 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());

3513 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,"lsr.chain");

3514 }

3515Phi.replaceUsesOfWith(PostIncV, IVOper);

3516 DeadInsts.emplace_back(PostIncV);

3517 }

3518 }

3519}

3520

3521void LSRInstance::CollectFixupsAndInitialFormulae() {

3522BranchInst *ExitBranch =nullptr;

3523bool SaveCmp =TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);

3524

3525// For calculating baseline cost

3526SmallPtrSet<const SCEV *, 16> Regs;

3527DenseSet<const SCEV *> VisitedRegs;

3528DenseSet<size_t> VisitedLSRUse;

3529

3530for (constIVStrideUse &U : IU) {

3531Instruction *UserInst =U.getUser();

3532// Skip IV users that are part of profitable IV Chains.

3533User::op_iterator UseI =

3534find(UserInst->operands(),U.getOperandValToReplace());

3535assert(UseI != UserInst->op_end() &&"cannot find IV operand");

3536if (IVIncSet.count(UseI)) {

3537LLVM_DEBUG(dbgs() <<"Use is in profitable chain: " << **UseI <<'\n');

3538continue;

3539 }

3540

3541 LSRUse::KindTypeKind = LSRUse::Basic;

3542 MemAccessTy AccessTy;

3543if (isAddressUse(TTI, UserInst,U.getOperandValToReplace())) {

3544Kind = LSRUse::Address;

3545 AccessTy =getAccessType(TTI, UserInst,U.getOperandValToReplace());

3546 }

3547

3548constSCEV *S = IU.getExpr(U);

3549if (!S)

3550continue;

3551PostIncLoopSet TmpPostIncLoops =U.getPostIncLoops();

3552

3553// Equality (== and !=) ICmps are special. We can rewrite (i == N) as

3554// (N - i == 0), and this allows (N - i) to be the expression that we work

3555// with rather than just N or i, so we can consider the register

3556// requirements for both N and i at the same time. Limiting this code to

3557// equality icmps is not a problem because all interesting loops use

3558// equality icmps, thanks to IndVarSimplify.

3559if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {

3560// If CI can be saved in some target, like replaced inside hardware loop

3561// in PowerPC, no need to generate initial formulae for it.

3562if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))

3563continue;

3564if (CI->isEquality()) {

3565// Swap the operands if needed to put the OperandValToReplace on the

3566// left, for consistency.

3567Value *NV = CI->getOperand(1);

3568if (NV ==U.getOperandValToReplace()) {

3569 CI->setOperand(1, CI->getOperand(0));

3570 CI->setOperand(0, NV);

3571NV = CI->getOperand(1);

3572 Changed =true;

3573 }

3574

3575// x == y --> x - y == 0

3576constSCEV *N = SE.getSCEV(NV);

3577if (SE.isLoopInvariant(N, L) &&Rewriter.isSafeToExpand(N) &&

3578 (!NV->getType()->isPointerTy() ||

3579 SE.getPointerBase(N) == SE.getPointerBase(S))) {

3580// S is normalized, so normalize N before folding it into S

3581// to keep the result normalized.

3582N =normalizeForPostIncUse(N, TmpPostIncLoops, SE);

3583if (!N)

3584continue;

3585Kind = LSRUse::ICmpZero;

3586 S = SE.getMinusSCEV(N, S);

3587 }elseif (L->isLoopInvariant(NV) &&

3588 (!isa<Instruction>(NV) ||

3589 DT.dominates(cast<Instruction>(NV),L->getHeader())) &&

3590 !NV->getType()->isPointerTy()) {

3591// If we can't generally expand the expression (e.g. it contains

3592// a divide), but it is already at a loop invariant point before the

3593// loop, wrap it in an unknown (to prevent the expander from trying

3594// to re-expand in a potentially unsafe way.) The restriction to

3595// integer types is required because the unknown hides the base, and

3596// SCEV can't compute the difference of two unknown pointers.

3597N = SE.getUnknown(NV);

3598N =normalizeForPostIncUse(N, TmpPostIncLoops, SE);

3599if (!N)

3600continue;

3601Kind = LSRUse::ICmpZero;

3602 S = SE.getMinusSCEV(N, S);

3603assert(!isa<SCEVCouldNotCompute>(S));

3604 }

3605

3606// -1 and the negations of all interesting strides (except the negation

3607// of -1) are now also interesting.

3608for (size_t i = 0, e = Factors.size(); i != e; ++i)

3609if (Factors[i] != -1)

3610 Factors.insert(-(uint64_t)Factors[i]);

3611 Factors.insert(-1);

3612 }

3613 }

3614

3615// Get or create an LSRUse.

3616 std::pair<size_t, Immediate>P = getUse(S, Kind, AccessTy);

3617size_t LUIdx =P.first;

3618 ImmediateOffset =P.second;

3619 LSRUse &LU =Uses[LUIdx];

3620

3621// Record the fixup.

3622 LSRFixup &LF = LU.getNewFixup();

3623 LF.UserInst = UserInst;

3624 LF.OperandValToReplace =U.getOperandValToReplace();

3625 LF.PostIncLoops = TmpPostIncLoops;

3626 LF.Offset =Offset;

3627 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);

3628

3629// Create SCEV as Formula for calculating baseline cost

3630if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {

3631 FormulaF;

3632F.initialMatch(S, L, SE);

3633 BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);

3634 VisitedLSRUse.insert(LUIdx);

3635 }

3636

3637if (!LU.WidestFixupType ||

3638 SE.getTypeSizeInBits(LU.WidestFixupType) <

3639 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))

3640 LU.WidestFixupType = LF.OperandValToReplace->getType();

3641

3642// If this is the first use of this LSRUse, give it a formula.

3643if (LU.Formulae.empty()) {

3644 InsertInitialFormula(S, LU, LUIdx);

3645 CountRegisters(LU.Formulae.back(), LUIdx);

3646 }

3647 }

3648

3649LLVM_DEBUG(print_fixups(dbgs()));

3650}

3651

3652/// Insert a formula for the given expression into the given use, separating out

3653/// loop-variant portions from loop-invariant and loop-computable portions.

3654void LSRInstance::InsertInitialFormula(constSCEV *S, LSRUse &LU,

3655size_t LUIdx) {

3656// Mark uses whose expressions cannot be expanded.

3657if (!Rewriter.isSafeToExpand(S))

3658 LU.RigidFormula =true;

3659

3660 FormulaF;

3661F.initialMatch(S, L, SE);

3662boolInserted = InsertFormula(LU, LUIdx,F);

3663assert(Inserted &&"Initial formula already exists!"); (void)Inserted;

3664}

3665

3666/// Insert a simple single-register formula for the given expression into the

3667/// given use.

3668void

3669LSRInstance::InsertSupplementalFormula(constSCEV *S,

3670 LSRUse &LU,size_t LUIdx) {

3671 FormulaF;

3672F.BaseRegs.push_back(S);

3673F.HasBaseReg =true;

3674boolInserted = InsertFormula(LU, LUIdx,F);

3675assert(Inserted &&"Supplemental formula already exists!"); (void)Inserted;

3676}

3677

3678/// Note which registers are used by the given formula, updating RegUses.

3679void LSRInstance::CountRegisters(const Formula &F,size_t LUIdx) {

3680if (F.ScaledReg)

3681 RegUses.countRegister(F.ScaledReg, LUIdx);

3682for (constSCEV *BaseReg :F.BaseRegs)

3683 RegUses.countRegister(BaseReg, LUIdx);

3684}

3685

3686/// If the given formula has not yet been inserted, add it to the list, and

3687/// return true. Return false otherwise.

3688bool LSRInstance::InsertFormula(LSRUse &LU,unsigned LUIdx,const Formula &F) {

3689// Do not insert formula that we will not be able to expand.

3690assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F) &&

3691"Formula is illegal");

3692

3693if (!LU.InsertFormula(F, *L))

3694returnfalse;

3695

3696 CountRegisters(F, LUIdx);

3697returntrue;

3698}

3699

3700/// Check for other uses of loop-invariant values which we're tracking. These

3701/// other uses will pin these values in registers, making them less profitable

3702/// for elimination.

3703/// TODO: This currently misses non-constant addrec step registers.

3704/// TODO: Should this give more weight to users inside the loop?

3705void

3706LSRInstance::CollectLoopInvariantFixupsAndFormulae() {

3707SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());

3708SmallPtrSet<const SCEV *, 32> Visited;

3709

3710// Don't collect outside uses if we are favoring postinc - the instructions in

3711// the loop are more important than the ones outside of it.

3712if (AMK ==TTI::AMK_PostIndexed)

3713return;

3714

3715while (!Worklist.empty()) {

3716constSCEV *S = Worklist.pop_back_val();

3717

3718// Don't process the same SCEV twice

3719if (!Visited.insert(S).second)

3720continue;

3721

3722if (constSCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))

3723append_range(Worklist,N->operands());

3724elseif (constSCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))

3725 Worklist.push_back(C->getOperand());

3726elseif (constSCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {

3727 Worklist.push_back(D->getLHS());

3728 Worklist.push_back(D->getRHS());

3729 }elseif (constSCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {

3730constValue *V = US->getValue();

3731if (constInstruction *Inst = dyn_cast<Instruction>(V)) {

3732// Look for instructions defined outside the loop.

3733if (L->contains(Inst))continue;

3734 }elseif (isa<Constant>(V))

3735// Constants can be re-materialized.

3736continue;

3737for (constUse &U :V->uses()) {

3738constInstruction *UserInst = dyn_cast<Instruction>(U.getUser());

3739// Ignore non-instructions.

3740if (!UserInst)

3741continue;

3742// Don't bother if the instruction is an EHPad.

3743if (UserInst->isEHPad())

3744continue;

3745// Ignore instructions in other functions (as can happen with

3746// Constants).

3747if (UserInst->getParent()->getParent() !=L->getHeader()->getParent())

3748continue;

3749// Ignore instructions not dominated by the loop.

3750constBasicBlock *UseBB = !isa<PHINode>(UserInst) ?

3751 UserInst->getParent() :

3752 cast<PHINode>(UserInst)->getIncomingBlock(

3753PHINode::getIncomingValueNumForOperand(U.getOperandNo()));

3754if (!DT.dominates(L->getHeader(), UseBB))

3755continue;

3756// Don't bother if the instruction is in a BB which ends in an EHPad.

3757if (UseBB->getTerminator()->isEHPad())

3758continue;

3759

3760// Ignore cases in which the currently-examined value could come from

3761// a basic block terminated with an EHPad. This checks all incoming

3762// blocks of the phi node since it is possible that the same incoming

3763// value comes from multiple basic blocks, only some of which may end

3764// in an EHPad. If any of them do, a subsequent rewrite attempt by this

3765// pass would try to insert instructions into an EHPad, hitting an

3766// assertion.

3767if (isa<PHINode>(UserInst)) {

3768constauto *PhiNode = cast<PHINode>(UserInst);

3769bool HasIncompatibleEHPTerminatedBlock =false;

3770llvm::Value *ExpectedValue =U;

3771for (unsignedintI = 0;I < PhiNode->getNumIncomingValues();I++) {

3772if (PhiNode->getIncomingValue(I) == ExpectedValue) {

3773if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {

3774 HasIncompatibleEHPTerminatedBlock =true;

3775break;

3776 }

3777 }

3778 }

3779if (HasIncompatibleEHPTerminatedBlock) {

3780continue;

3781 }

3782 }

3783

3784// Don't bother rewriting PHIs in catchswitch blocks.

3785if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))

3786continue;

3787// Ignore uses which are part of other SCEV expressions, to avoid

3788// analyzing them multiple times.

3789if (SE.isSCEVable(UserInst->getType())) {

3790constSCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));

3791// If the user is a no-op, look through to its uses.

3792if (!isa<SCEVUnknown>(UserS))

3793continue;

3794if (UserS == US) {

3795 Worklist.push_back(

3796 SE.getUnknown(const_cast<Instruction *>(UserInst)));

3797continue;

3798 }

3799 }

3800// Ignore icmp instructions which are already being analyzed.

3801if (constICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {

3802unsigned OtherIdx = !U.getOperandNo();

3803Value *OtherOp =const_cast<Value *>(ICI->getOperand(OtherIdx));

3804if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))

3805continue;

3806 }

3807

3808 std::pair<size_t, Immediate>P =

3809 getUse(S, LSRUse::Basic, MemAccessTy());

3810size_t LUIdx =P.first;

3811 ImmediateOffset =P.second;

3812 LSRUse &LU =Uses[LUIdx];

3813 LSRFixup &LF = LU.getNewFixup();

3814 LF.UserInst =const_cast<Instruction *>(UserInst);

3815 LF.OperandValToReplace =U;

3816 LF.Offset =Offset;

3817 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);

3818if (!LU.WidestFixupType ||

3819 SE.getTypeSizeInBits(LU.WidestFixupType) <

3820 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))

3821 LU.WidestFixupType = LF.OperandValToReplace->getType();

3822 InsertSupplementalFormula(US, LU, LUIdx);

3823 CountRegisters(LU.Formulae.back(),Uses.size() - 1);

3824break;

3825 }

3826 }

3827 }

3828}

3829

3830/// Split S into subexpressions which can be pulled out into separate

3831/// registers. If C is non-null, multiply each subexpression by C.

3832///

3833/// Return remainder expression after factoring the subexpressions captured by

3834/// Ops. If Ops is complete, return NULL.

3835staticconstSCEV *CollectSubexprs(constSCEV *S,constSCEVConstant *C,

3836SmallVectorImpl<const SCEV *> &Ops,

3837constLoop *L,

3838ScalarEvolution &SE,

3839unsignedDepth = 0) {

3840// Arbitrarily cap recursion to protect compile time.

3841if (Depth >= 3)

3842return S;

3843

3844if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

3845// Break out add operands.

3846for (constSCEV *S :Add->operands()) {

3847constSCEV *Remainder =CollectSubexprs(S,C, Ops, L, SE,Depth+1);

3848if (Remainder)

3849 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

3850 }

3851returnnullptr;

3852 }elseif (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

3853// Split a non-zero base out of an addrec.

3854if (AR->getStart()->isZero() || !AR->isAffine())

3855return S;

3856

3857constSCEV *Remainder =CollectSubexprs(AR->getStart(),

3858C, Ops, L, SE,Depth+1);

3859// Split the non-zero AddRec unless it is part of a nested recurrence that

3860// does not pertain to this loop.

3861if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {

3862 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

3863 Remainder =nullptr;

3864 }

3865if (Remainder != AR->getStart()) {

3866if (!Remainder)

3867 Remainder = SE.getConstant(AR->getType(), 0);

3868return SE.getAddRecExpr(Remainder,

3869 AR->getStepRecurrence(SE),

3870 AR->getLoop(),

3871//FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

3872SCEV::FlagAnyWrap);

3873 }

3874 }elseif (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {

3875// Break (C * (a + b + c)) into C*a + C*b + C*c.

3876if (Mul->getNumOperands() != 2)

3877return S;

3878if (constSCEVConstant *Op0 =

3879 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {

3880C =C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;

3881constSCEV *Remainder =

3882CollectSubexprs(Mul->getOperand(1),C, Ops, L, SE,Depth+1);

3883if (Remainder)

3884 Ops.push_back(SE.getMulExpr(C, Remainder));

3885returnnullptr;

3886 }

3887 }

3888return S;

3889}

3890

3891/// Return true if the SCEV represents a value that may end up as a

3892/// post-increment operation.

3893staticboolmayUsePostIncMode(constTargetTransformInfo &TTI,

3894 LSRUse &LU,constSCEV *S,constLoop *L,

3895ScalarEvolution &SE) {

3896if (LU.Kind != LSRUse::Address ||

3897 !LU.AccessTy.getType()->isIntOrIntVectorTy())

3898returnfalse;

3899constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);

3900if (!AR)

3901returnfalse;

3902constSCEV *LoopStep = AR->getStepRecurrence(SE);

3903if (!isa<SCEVConstant>(LoopStep))

3904returnfalse;

3905// Check if a post-indexed load/store can be used.

3906if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||

3907TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {

3908constSCEV *LoopStart = AR->getStart();

3909if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))

3910returntrue;

3911 }

3912returnfalse;

3913}

3914

3915/// Helper function for LSRInstance::GenerateReassociations.

3916void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,unsigned LUIdx,

3917const Formula &Base,

3918unsignedDepth,size_tIdx,

3919bool IsScaledReg) {

3920constSCEV *BaseReg = IsScaledReg ?Base.ScaledReg :Base.BaseRegs[Idx];

3921// Don't generate reassociations for the base register of a value that

3922// may generate a post-increment operator. The reason is that the

3923// reassociations cause extra base+register formula to be created,

3924// and possibly chosen, but the post-increment is more efficient.

3925if (AMK ==TTI::AMK_PostIndexed &&mayUsePostIncMode(TTI, LU, BaseReg, L, SE))

3926return;

3927SmallVector<const SCEV *, 8> AddOps;

3928constSCEV *Remainder =CollectSubexprs(BaseReg,nullptr, AddOps, L, SE);

3929if (Remainder)

3930 AddOps.push_back(Remainder);

3931

3932if (AddOps.size() == 1)

3933return;

3934

3935for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),

3936 JE = AddOps.end();

3937 J != JE; ++J) {

3938// Loop-variant "unknown" values are uninteresting; we won't be able to

3939// do anything meaningful with them.

3940if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))

3941continue;

3942

3943// Don't pull a constant into a register if the constant could be folded

3944// into an immediate field.

3945if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,

3946 LU.AccessTy, *J,Base.getNumRegs() > 1))

3947continue;

3948

3949// Collect all operands except *J.

3950SmallVector<const SCEV *, 8> InnerAddOps(

3951 ((constSmallVector<const SCEV *, 8> &)AddOps).begin(), J);

3952 InnerAddOps.append(std::next(J),

3953 ((constSmallVector<const SCEV *, 8> &)AddOps).end());

3954

3955// Don't leave just a constant behind in a register if the constant could

3956// be folded into an immediate field.

3957if (InnerAddOps.size() == 1 &&

3958isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,

3959 LU.AccessTy, InnerAddOps[0],Base.getNumRegs() > 1))

3960continue;

3961

3962constSCEV *InnerSum = SE.getAddExpr(InnerAddOps);

3963if (InnerSum->isZero())

3964continue;

3965 FormulaF =Base;

3966

3967if (F.UnfoldedOffset.isNonZero() &&F.UnfoldedOffset.isScalable())

3968continue;

3969

3970// Add the remaining pieces of the add back into the new formula.

3971constSCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);

3972if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&

3973TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +

3974 InnerSumSC->getValue()->getZExtValue())) {

3975F.UnfoldedOffset =

3976 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

3977 InnerSumSC->getValue()->getZExtValue());

3978if (IsScaledReg) {

3979F.ScaledReg =nullptr;

3980F.Scale = 0;

3981 }else

3982F.BaseRegs.erase(F.BaseRegs.begin() +Idx);

3983 }elseif (IsScaledReg)

3984F.ScaledReg = InnerSum;

3985else

3986F.BaseRegs[Idx] = InnerSum;

3987

3988// Add J as its own register, or an unfolded immediate.

3989constSCEVConstant *SC = dyn_cast<SCEVConstant>(*J);

3990if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&

3991TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +

3992SC->getValue()->getZExtValue()))

3993F.UnfoldedOffset =

3994 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

3995SC->getValue()->getZExtValue());

3996else

3997F.BaseRegs.push_back(*J);

3998// We may have changed the number of register in base regs, adjust the

3999// formula accordingly.

4000F.canonicalize(*L);

4001

4002if (InsertFormula(LU, LUIdx,F))

4003// If that formula hadn't been seen before, recurse to find more like

4004// it.

4005// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)

4006// Because just Depth is not enough to bound compile time.

4007// This means that every time AddOps.size() is greater 16^x we will add

4008// x to Depth.

4009 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),

4010Depth + 1 + (Log2_32(AddOps.size()) >> 2));

4011 }

4012}

4013

4014/// Split out subexpressions from adds and the bases of addrecs.

4015void LSRInstance::GenerateReassociations(LSRUse &LU,unsigned LUIdx,

4016 FormulaBase,unsignedDepth) {

4017assert(Base.isCanonical(*L) &&"Input must be in the canonical form");

4018// Arbitrarily cap recursion to protect compile time.

4019if (Depth >= 3)

4020return;

4021

4022for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i)

4023 GenerateReassociationsImpl(LU, LUIdx,Base,Depth, i);

4024

4025if (Base.Scale == 1)

4026 GenerateReassociationsImpl(LU, LUIdx,Base,Depth,

4027/* Idx */ -1,/* IsScaledReg */true);

4028}

4029

4030/// Generate a formula consisting of all of the loop-dominating registers added

4031/// into a single register.

4032void LSRInstance::GenerateCombinations(LSRUse &LU,unsigned LUIdx,

4033 FormulaBase) {

4034// This method is only interesting on a plurality of registers.

4035if (Base.BaseRegs.size() + (Base.Scale == 1) +

4036 (Base.UnfoldedOffset.isNonZero()) <=

4037 1)

4038return;

4039

4040// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before

4041// processing the formula.

4042Base.unscale();

4043SmallVector<const SCEV *, 4> Ops;

4044 Formula NewBase =Base;

4045 NewBase.BaseRegs.clear();

4046Type *CombinedIntegerType =nullptr;

4047for (constSCEV *BaseReg :Base.BaseRegs) {

4048if (SE.properlyDominates(BaseReg,L->getHeader()) &&

4049 !SE.hasComputableLoopEvolution(BaseReg, L)) {

4050if (!CombinedIntegerType)

4051 CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());

4052 Ops.push_back(BaseReg);

4053 }

4054else

4055 NewBase.BaseRegs.push_back(BaseReg);

4056 }

4057

4058// If no register is relevant, we're done.

4059if (Ops.size() == 0)

4060return;

4061

4062// Utility function for generating the required variants of the combined

4063// registers.

4064auto GenerateFormula = [&](constSCEV *Sum) {

4065 FormulaF = NewBase;

4066

4067// TODO: If Sum is zero, it probably means ScalarEvolution missed an

4068// opportunity to fold something. For now, just ignore such cases

4069// rather than proceed with zero in a register.

4070if (Sum->isZero())

4071return;

4072

4073F.BaseRegs.push_back(Sum);

4074F.canonicalize(*L);

4075 (void)InsertFormula(LU, LUIdx,F);

4076 };

4077

4078// If we collected at least two registers, generate a formula combining them.

4079if (Ops.size() > 1) {

4080SmallVector<const SCEV *, 4> OpsCopy(Ops);// Don't let SE modify Ops.

4081 GenerateFormula(SE.getAddExpr(OpsCopy));

4082 }

4083

4084// If we have an unfolded offset, generate a formula combining it with the

4085// registers collected.

4086if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {

4087assert(CombinedIntegerType &&"Missing a type for the unfolded offset");

4088 Ops.push_back(SE.getConstant(CombinedIntegerType,

4089 NewBase.UnfoldedOffset.getFixedValue(),true));

4090 NewBase.UnfoldedOffset = Immediate::getFixed(0);

4091 GenerateFormula(SE.getAddExpr(Ops));

4092 }

4093}

4094

4095/// Helper function for LSRInstance::GenerateSymbolicOffsets.

4096void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,unsigned LUIdx,

4097const Formula &Base,size_tIdx,

4098bool IsScaledReg) {

4099constSCEV *G = IsScaledReg ?Base.ScaledReg :Base.BaseRegs[Idx];

4100GlobalValue *GV =ExtractSymbol(G, SE);

4101if (G->isZero() || !GV)

4102return;

4103 FormulaF =Base;

4104F.BaseGV = GV;

4105if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F))

4106return;

4107if (IsScaledReg)

4108F.ScaledReg =G;

4109else

4110F.BaseRegs[Idx] =G;

4111 (void)InsertFormula(LU, LUIdx,F);

4112}

4113

4114/// Generate reuse formulae using symbolic offsets.

4115void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,unsigned LUIdx,

4116 FormulaBase) {

4117// We can't add a symbolic offset if the address already contains one.

4118if (Base.BaseGV)return;

4119

4120for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i)

4121 GenerateSymbolicOffsetsImpl(LU, LUIdx,Base, i);

4122if (Base.Scale == 1)

4123 GenerateSymbolicOffsetsImpl(LU, LUIdx,Base,/* Idx */ -1,

4124/* IsScaledReg */true);

4125}

4126

4127/// Helper function for LSRInstance::GenerateConstantOffsets.

4128void LSRInstance::GenerateConstantOffsetsImpl(

4129 LSRUse &LU,unsigned LUIdx,const Formula &Base,

4130constSmallVectorImpl<Immediate> &Worklist,size_tIdx,bool IsScaledReg) {

4131

4132auto GenerateOffset = [&](constSCEV *G, ImmediateOffset) {

4133 FormulaF =Base;

4134if (!Base.BaseOffset.isCompatibleImmediate(Offset))

4135return;

4136F.BaseOffset =Base.BaseOffset.subUnsigned(Offset);

4137

4138if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F)) {

4139// Add the offset to the base register.

4140constSCEV *NewOffset =Offset.getSCEV(SE,G->getType());

4141constSCEV *NewG = SE.getAddExpr(NewOffset,G);

4142// If it cancelled out, drop the base register, otherwise update it.

4143if (NewG->isZero()) {

4144if (IsScaledReg) {

4145F.Scale = 0;

4146F.ScaledReg =nullptr;

4147 }else

4148F.deleteBaseReg(F.BaseRegs[Idx]);

4149F.canonicalize(*L);

4150 }elseif (IsScaledReg)

4151F.ScaledReg = NewG;

4152else

4153F.BaseRegs[Idx] = NewG;

4154

4155 (void)InsertFormula(LU, LUIdx,F);

4156 }

4157 };

4158

4159constSCEV *G = IsScaledReg ?Base.ScaledReg :Base.BaseRegs[Idx];

4160

4161// With constant offsets and constant steps, we can generate pre-inc

4162// accesses by having the offset equal the step. So, for access #0 with a

4163// step of 8, we generate a G - 8 base which would require the first access

4164// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer

4165// for itself and hopefully becomes the base for other accesses. This means

4166// means that a single pre-indexed access can be generated to become the new

4167// base pointer for each iteration of the loop, resulting in no extra add/sub

4168// instructions for pointer updating.

4169if (AMK ==TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {

4170if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {

4171if (auto *StepRec =

4172 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {

4173constAPInt &StepInt = StepRec->getAPInt();

4174 int64_t Step = StepInt.isNegative() ?

4175 StepInt.getSExtValue() : StepInt.getZExtValue();

4176

4177for (ImmediateOffset : Worklist) {

4178if (Offset.isFixed()) {

4179Offset = Immediate::getFixed(Offset.getFixedValue() - Step);

4180 GenerateOffset(G,Offset);

4181 }

4182 }

4183 }

4184 }

4185 }

4186for (ImmediateOffset : Worklist)

4187 GenerateOffset(G,Offset);

4188

4189 ImmediateImm =ExtractImmediate(G, SE);

4190if (G->isZero() ||Imm.isZero() ||

4191 !Base.BaseOffset.isCompatibleImmediate(Imm))

4192return;

4193 FormulaF =Base;

4194F.BaseOffset =F.BaseOffset.addUnsigned(Imm);

4195if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F))

4196return;

4197if (IsScaledReg) {

4198F.ScaledReg =G;

4199 }else {

4200F.BaseRegs[Idx] =G;

4201// We may generate non canonical Formula if G is a recurrent expr reg

4202// related with current loop while F.ScaledReg is not.

4203F.canonicalize(*L);

4204 }

4205 (void)InsertFormula(LU, LUIdx,F);

4206}

4207

4208/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.

4209void LSRInstance::GenerateConstantOffsets(LSRUse &LU,unsigned LUIdx,

4210 FormulaBase) {

4211// TODO: For now, just add the min and max offset, because it usually isn't

4212// worthwhile looking at everything inbetween.

4213SmallVector<Immediate, 2> Worklist;

4214 Worklist.push_back(LU.MinOffset);

4215if (LU.MaxOffset != LU.MinOffset)

4216 Worklist.push_back(LU.MaxOffset);

4217

4218for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i)

4219 GenerateConstantOffsetsImpl(LU, LUIdx,Base, Worklist, i);

4220if (Base.Scale == 1)

4221 GenerateConstantOffsetsImpl(LU, LUIdx,Base, Worklist,/* Idx */ -1,

4222/* IsScaledReg */true);

4223}

4224

4225/// For ICmpZero, check to see if we can scale up the comparison. For example, x

4226/// == y -> x*c == y*c.

4227void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,unsigned LUIdx,

4228 FormulaBase) {

4229if (LU.Kind != LSRUse::ICmpZero)return;

4230

4231// Determine the integer type for the base formula.

4232Type *IntTy =Base.getType();

4233if (!IntTy)return;

4234if (SE.getTypeSizeInBits(IntTy) > 64)return;

4235

4236// Don't do this if there is more than one offset.

4237if (LU.MinOffset != LU.MaxOffset)return;

4238

4239// Check if transformation is valid. It is illegal to multiply pointer.

4240if (Base.ScaledReg &&Base.ScaledReg->getType()->isPointerTy())

4241return;

4242for (constSCEV *BaseReg :Base.BaseRegs)

4243if (BaseReg->getType()->isPointerTy())

4244return;

4245assert(!Base.BaseGV &&"ICmpZero use is not legal!");

4246

4247// Check each interesting stride.

4248for (int64_t Factor : Factors) {

4249// Check that Factor can be represented by IntTy

4250if (!ConstantInt::isValueValidForType(IntTy, Factor))

4251continue;

4252// Check that the multiplication doesn't overflow.

4253if (Base.BaseOffset.isMin() && Factor == -1)

4254continue;

4255// Not supporting scalable immediates.

4256if (Base.BaseOffset.isNonZero() &&Base.BaseOffset.isScalable())

4257continue;

4258 Immediate NewBaseOffset =Base.BaseOffset.mulUnsigned(Factor);

4259assert(Factor != 0 &&"Zero factor not expected!");

4260if (NewBaseOffset.getFixedValue() / Factor !=

4261Base.BaseOffset.getFixedValue())

4262continue;

4263// If the offset will be truncated at this use, check that it is in bounds.

4264if (!IntTy->isPointerTy() &&

4265 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue()))

4266continue;

4267

4268// Check that multiplying with the use offset doesn't overflow.

4269 ImmediateOffset = LU.MinOffset;

4270if (Offset.isMin() && Factor == -1)

4271continue;

4272Offset =Offset.mulUnsigned(Factor);

4273if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())

4274continue;

4275// If the offset will be truncated at this use, check that it is in bounds.

4276if (!IntTy->isPointerTy() &&

4277 !ConstantInt::isValueValidForType(IntTy,Offset.getFixedValue()))

4278continue;

4279

4280 FormulaF =Base;

4281F.BaseOffset = NewBaseOffset;

4282

4283// Check that this scale is legal.

4284if (!isLegalUse(TTI,Offset,Offset, LU.Kind, LU.AccessTy,F))

4285continue;

4286

4287// Compensate for the use having MinOffset built into it.

4288F.BaseOffset =F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);

4289

4290constSCEV *FactorS = SE.getConstant(IntTy, Factor);

4291

4292// Check that multiplying with each base register doesn't overflow.

4293for (size_t i = 0, e =F.BaseRegs.size(); i != e; ++i) {

4294F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);

4295if (getExactSDiv(F.BaseRegs[i], FactorS, SE) !=Base.BaseRegs[i])

4296goto next;

4297 }

4298

4299// Check that multiplying with the scaled register doesn't overflow.

4300if (F.ScaledReg) {

4301F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);

4302if (getExactSDiv(F.ScaledReg, FactorS, SE) !=Base.ScaledReg)

4303continue;

4304 }

4305

4306// Check that multiplying with the unfolded offset doesn't overflow.

4307if (F.UnfoldedOffset.isNonZero()) {

4308if (F.UnfoldedOffset.isMin() && Factor == -1)

4309continue;

4310F.UnfoldedOffset =F.UnfoldedOffset.mulUnsigned(Factor);

4311if (F.UnfoldedOffset.getFixedValue() / Factor !=

4312Base.UnfoldedOffset.getFixedValue())

4313continue;

4314// If the offset will be truncated, check that it is in bounds.

4315if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(

4316 IntTy,F.UnfoldedOffset.getFixedValue()))

4317continue;

4318 }

4319

4320// If we make it here and it's legal, add it.

4321 (void)InsertFormula(LU, LUIdx,F);

4322 next:;

4323 }

4324}

4325

4326/// Generate stride factor reuse formulae by making use of scaled-offset address

4327/// modes, for example.

4328void LSRInstance::GenerateScales(LSRUse &LU,unsigned LUIdx, FormulaBase) {

4329// Determine the integer type for the base formula.

4330Type *IntTy =Base.getType();

4331if (!IntTy)return;

4332

4333// If this Formula already has a scaled register, we can't add another one.

4334// Try to unscale the formula to generate a better scale.

4335if (Base.Scale != 0 && !Base.unscale())

4336return;

4337

4338assert(Base.Scale == 0 &&"unscale did not did its job!");

4339

4340// Check each interesting stride.

4341for (int64_t Factor : Factors) {

4342Base.Scale = Factor;

4343Base.HasBaseReg =Base.BaseRegs.size() > 1;

4344// Check whether this scale is going to be legal.

4345if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

4346Base)) {

4347// As a special-case, handle special out-of-loop Basic users specially.

4348// TODO: Reconsider this special case.

4349if (LU.Kind == LSRUse::Basic &&

4350isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,

4351 LU.AccessTy,Base) &&

4352 LU.AllFixupsOutsideLoop)

4353 LU.Kind = LSRUse::Special;

4354else

4355continue;

4356 }

4357// For an ICmpZero, negating a solitary base register won't lead to

4358// new solutions.

4359if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&

4360Base.BaseOffset.isZero() && !Base.BaseGV)

4361continue;

4362// For each addrec base reg, if its loop is current loop, apply the scale.

4363for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i) {

4364constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);

4365if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {

4366constSCEV *FactorS = SE.getConstant(IntTy, Factor);

4367if (FactorS->isZero())

4368continue;

4369// Divide out the factor, ignoring high bits, since we'll be

4370// scaling the value back up in the end.

4371if (constSCEV *Quotient =getExactSDiv(AR, FactorS, SE,true))

4372if (!Quotient->isZero()) {

4373// TODO: This could be optimized to avoid all the copying.

4374 FormulaF =Base;

4375F.ScaledReg = Quotient;

4376F.deleteBaseReg(F.BaseRegs[i]);

4377// The canonical representation of 1*reg is reg, which is already in

4378// Base. In that case, do not try to insert the formula, it will be

4379// rejected anyway.

4380if (F.Scale == 1 && (F.BaseRegs.empty() ||

4381 (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))

4382continue;

4383// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate

4384// non canonical Formula with ScaledReg's loop not being L.

4385if (F.Scale == 1 && LU.AllFixupsOutsideLoop)

4386F.canonicalize(*L);

4387 (void)InsertFormula(LU, LUIdx,F);

4388 }

4389 }

4390 }

4391 }

4392}

4393

4394/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.

4395/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then

4396/// perform the extension/truncate and normalize again, as the normalized form

4397/// can result in folds that are not valid in the post-inc use contexts. The

4398/// expressions for all PostIncLoopSets must match, otherwise return nullptr.

4399staticconstSCEV *

4400getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet>Loops,

4401constSCEV *Expr,Type *ToTy,

4402ScalarEvolution &SE) {

4403constSCEV *Result =nullptr;

4404for (auto &L :Loops) {

4405auto *DenormExpr =denormalizeForPostIncUse(Expr, L, SE);

4406constSCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);

4407constSCEV *New =normalizeForPostIncUse(NewDenormExpr, L, SE);

4408if (!New || (Result && New != Result))

4409returnnullptr;

4410 Result = New;

4411 }

4412

4413assert(Result &&"failed to create expression");

4414return Result;

4415}

4416

4417/// Generate reuse formulae from different IV types.

4418void LSRInstance::GenerateTruncates(LSRUse &LU,unsigned LUIdx, FormulaBase) {

4419// Don't bother truncating symbolic values.

4420if (Base.BaseGV)return;

4421

4422// Determine the integer type for the base formula.

4423Type *DstTy =Base.getType();

4424if (!DstTy)return;

4425if (DstTy->isPointerTy())

4426return;

4427

4428// It is invalid to extend a pointer type so exit early if ScaledReg or

4429// any of the BaseRegs are pointers.

4430if (Base.ScaledReg &&Base.ScaledReg->getType()->isPointerTy())

4431return;

4432if (any_of(Base.BaseRegs,

4433 [](constSCEV *S) { return S->getType()->isPointerTy(); }))

4434return;

4435

4436SmallVector<PostIncLoopSet>Loops;

4437for (auto &LF : LU.Fixups)

4438Loops.push_back(LF.PostIncLoops);

4439

4440for (Type *SrcTy : Types) {

4441if (SrcTy != DstTy &&TTI.isTruncateFree(SrcTy, DstTy)) {

4442 FormulaF =Base;

4443

4444// Sometimes SCEV is able to prove zero during ext transform. It may

4445// happen if SCEV did not do all possible transforms while creating the

4446// initial node (maybe due to depth limitations), but it can do them while

4447// taking ext.

4448if (F.ScaledReg) {

4449constSCEV *NewScaledReg =

4450getAnyExtendConsideringPostIncUses(Loops,F.ScaledReg, SrcTy, SE);

4451if (!NewScaledReg || NewScaledReg->isZero())

4452continue;

4453F.ScaledReg = NewScaledReg;

4454 }

4455bool HasZeroBaseReg =false;

4456for (constSCEV *&BaseReg :F.BaseRegs) {

4457constSCEV *NewBaseReg =

4458getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE);

4459if (!NewBaseReg || NewBaseReg->isZero()) {

4460 HasZeroBaseReg =true;

4461break;

4462 }

4463 BaseReg = NewBaseReg;

4464 }

4465if (HasZeroBaseReg)

4466continue;

4467

4468// TODO: This assumes we've done basic processing on all uses and

4469// have an idea what the register usage is.

4470if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))

4471continue;

4472

4473F.canonicalize(*L);

4474 (void)InsertFormula(LU, LUIdx,F);

4475 }

4476 }

4477}

4478

4479namespace{

4480

4481/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer

4482/// modifications so that the search phase doesn't have to worry about the data

4483/// structures moving underneath it.

4484structWorkItem {

4485size_t LUIdx;

4486 ImmediateImm;

4487constSCEV *OrigReg;

4488

4489WorkItem(size_t LI, ImmediateI,constSCEV *R)

4490 : LUIdx(LI),Imm(I), OrigReg(R) {}

4491

4492voidprint(raw_ostream &OS)const;

4493voiddump()const;

4494};

4495

4496}// end anonymous namespace

4497

4498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

4499void WorkItem::print(raw_ostream &OS) const{

4500OS <<"in formulae referencing " << *OrigReg <<" in use " << LUIdx

4501 <<" , add offset " <<Imm;

4502}

4503

4504LLVM_DUMP_METHODvoid WorkItem::dump() const{

4505print(errs());errs() <<'\n';

4506}

4507#endif

4508

4509/// Look for registers which are a constant distance apart and try to form reuse

4510/// opportunities between them.

4511void LSRInstance::GenerateCrossUseConstantOffsets() {

4512// Group the registers by their value without any added constant offset.

4513usingImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;

4514

4515DenseMap<const SCEV *, ImmMapTy>Map;

4516DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;

4517SmallVector<const SCEV *, 8>Sequence;

4518for (constSCEV *Use : RegUses) {

4519constSCEV *Reg =Use;// Make a copy for ExtractImmediate to modify.

4520 ImmediateImm =ExtractImmediate(Reg, SE);

4521auto Pair =Map.insert(std::make_pair(Reg, ImmMapTy()));

4522if (Pair.second)

4523Sequence.push_back(Reg);

4524 Pair.first->second.insert(std::make_pair(Imm,Use));

4525 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);

4526 }

4527

4528// Now examine each set of registers with the same base value. Build up

4529// a list of work to do and do the work in a separate step so that we're

4530// not adding formulae and register counts while we're searching.

4531SmallVector<WorkItem, 32> WorkItems;

4532SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>

4533 UniqueItems;

4534for (constSCEV *Reg : Sequence) {

4535const ImmMapTy &Imms =Map.find(Reg)->second;

4536

4537// It's not worthwhile looking for reuse if there's only one offset.

4538if (Imms.size() == 1)

4539continue;

4540

4541LLVM_DEBUG(dbgs() <<"Generating cross-use offsets for " << *Reg <<':';

4542for (constauto &Entry

4543 : Imms)dbgs()

4544 <<' ' <<Entry.first;

4545dbgs() <<'\n');

4546

4547// Examine each offset.

4548for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();

4549 J != JE; ++J) {

4550constSCEV *OrigReg = J->second;

4551

4552 Immediate JImm = J->first;

4553constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);

4554

4555if (!isa<SCEVConstant>(OrigReg) &&

4556 UsedByIndicesMap[Reg].count() == 1) {

4557LLVM_DEBUG(dbgs() <<"Skipping cross-use reuse for " << *OrigReg

4558 <<'\n');

4559continue;

4560 }

4561

4562// Conservatively examine offsets between this orig reg a few selected

4563// other orig regs.

4564 ImmediateFirst = Imms.begin()->first;

4565 ImmediateLast = std::prev(Imms.end())->first;

4566if (!First.isCompatibleImmediate(Last)) {

4567LLVM_DEBUG(dbgs() <<"Skipping cross-use reuse for " << *OrigReg

4568 <<"\n");

4569continue;

4570 }

4571// Only scalable if both terms are scalable, or if one is scalable and

4572// the other is 0.

4573bool Scalable =First.isScalable() ||Last.isScalable();

4574 int64_t FI =First.getKnownMinValue();

4575 int64_t LI =Last.getKnownMinValue();

4576// Compute (First + Last) / 2 without overflow using the fact that

4577// First + Last = 2 * (First + Last) + (First ^ Last).

4578 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);

4579// If the result is negative and FI is odd and LI even (or vice versa),

4580// we rounded towards -inf. Add 1 in that case, to round towards 0.

4581 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));

4582 ImmMapTy::const_iterator OtherImms[] = {

4583 Imms.begin(), std::prev(Imms.end()),

4584 Imms.lower_bound(Immediate::get(Avg, Scalable))};

4585for (constauto &M : OtherImms) {

4586if (M == J || M == JE)continue;

4587if (!JImm.isCompatibleImmediate(M->first))

4588continue;

4589

4590// Compute the difference between the two.

4591 ImmediateImm = JImm.subUnsigned(M->first);

4592for (unsigned LUIdx : UsedByIndices.set_bits())

4593// Make a memo of this use, offset, and register tuple.

4594if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)

4595 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));

4596 }

4597 }

4598 }

4599

4600Map.clear();

4601Sequence.clear();

4602 UsedByIndicesMap.clear();

4603 UniqueItems.clear();

4604

4605// Now iterate through the worklist and add new formulae.

4606for (constWorkItem &WI : WorkItems) {

4607size_t LUIdx = WI.LUIdx;

4608 LSRUse &LU =Uses[LUIdx];

4609 ImmediateImm = WI.Imm;

4610constSCEV *OrigReg = WI.OrigReg;

4611

4612Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());

4613constSCEV *NegImmS =Imm.getNegativeSCEV(SE, IntTy);

4614unsignedBitWidth = SE.getTypeSizeInBits(IntTy);

4615

4616// TODO: Use a more targeted data structure.

4617for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {

4618 FormulaF = LU.Formulae[L];

4619// FIXME: The code for the scaled and unscaled registers looks

4620// very similar but slightly different. Investigate if they

4621// could be merged. That way, we would not have to unscale the

4622// Formula.

4623F.unscale();

4624// Use the immediate in the scaled register.

4625if (F.ScaledReg == OrigReg) {

4626if (!F.BaseOffset.isCompatibleImmediate(Imm))

4627continue;

4628 ImmediateOffset =F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));

4629// Don't create 50 + reg(-50).

4630constSCEV *S =Offset.getNegativeSCEV(SE, IntTy);

4631if (F.referencesReg(S))

4632continue;

4633 Formula NewF =F;

4634 NewF.BaseOffset =Offset;

4635if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

4636 NewF))

4637continue;

4638 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);

4639

4640// If the new scale is a constant in a register, and adding the constant

4641// value to the immediate would produce a value closer to zero than the

4642// immediate itself, then the formula isn't worthwhile.

4643if (constSCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {

4644// FIXME: Do we need to do something for scalable immediates here?

4645// A scalable SCEV won't be constant, but we might still have

4646// something in the offset? Bail out for now to be safe.

4647if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

4648continue;

4649if (C->getValue()->isNegative() !=

4650 (NewF.BaseOffset.isLessThanZero()) &&

4651 (C->getAPInt().abs() *APInt(BitWidth,F.Scale))

4652 .ule(std::abs(NewF.BaseOffset.getFixedValue())))

4653continue;

4654 }

4655

4656// OK, looks good.

4657 NewF.canonicalize(*this->L);

4658 (void)InsertFormula(LU, LUIdx, NewF);

4659 }else {

4660// Use the immediate in a base register.

4661for (size_tN = 0, NE =F.BaseRegs.size();N !=NE; ++N) {

4662constSCEV *BaseReg =F.BaseRegs[N];

4663if (BaseReg != OrigReg)

4664continue;

4665 Formula NewF =F;

4666if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||

4667 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||

4668 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))

4669continue;

4670 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);

4671if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,

4672 LU.Kind, LU.AccessTy, NewF)) {

4673if (AMK ==TTI::AMK_PostIndexed &&

4674mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))

4675continue;

4676 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);

4677if (!isLegalAddImmediate(TTI, NewUnfoldedOffset))

4678continue;

4679 NewF =F;

4680 NewF.UnfoldedOffset = NewUnfoldedOffset;

4681 }

4682 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);

4683

4684// If the new formula has a constant in a register, and adding the

4685// constant value to the immediate would produce a value closer to

4686// zero than the immediate itself, then the formula isn't worthwhile.

4687for (constSCEV *NewReg : NewF.BaseRegs)

4688if (constSCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) {

4689if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

4690goto skip_formula;

4691if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())

4692 .abs()

4693 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&

4694 (C->getAPInt() + NewF.BaseOffset.getFixedValue())

4695 .countr_zero() >=

4696 (unsigned)llvm::countr_zero<uint64_t>(

4697 NewF.BaseOffset.getFixedValue()))

4698goto skip_formula;

4699 }

4700

4701// Ok, looks good.

4702 NewF.canonicalize(*this->L);

4703 (void)InsertFormula(LU, LUIdx, NewF);

4704break;

4705 skip_formula:;

4706 }

4707 }

4708 }

4709 }

4710}

4711

4712/// Generate formulae for each use.

4713void

4714LSRInstance::GenerateAllReuseFormulae() {

4715// This is split into multiple loops so that hasRegsUsedByUsesOtherThan

4716// queries are more precise.

4717for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

4718 LSRUse &LU =Uses[LUIdx];

4719for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4720 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);

4721for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4722 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);

4723 }

4724for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

4725 LSRUse &LU =Uses[LUIdx];

4726for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4727 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);

4728for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4729 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);

4730for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4731 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);

4732for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4733 GenerateScales(LU, LUIdx, LU.Formulae[i]);

4734 }

4735for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

4736 LSRUse &LU =Uses[LUIdx];

4737for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4738 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);

4739 }

4740

4741 GenerateCrossUseConstantOffsets();

4742

4743LLVM_DEBUG(dbgs() <<"\n"

4744"After generating reuse formulae:\n";

4745 print_uses(dbgs()));

4746}

4747

4748/// If there are multiple formulae with the same set of registers used

4749/// by other uses, pick the best one and delete the others.

4750void LSRInstance::FilterOutUndesirableDedicatedRegisters() {

4751DenseSet<const SCEV *> VisitedRegs;

4752SmallPtrSet<const SCEV *, 16> Regs;

4753SmallPtrSet<const SCEV *, 16> LoserRegs;

4754#ifndef NDEBUG

4755bool ChangedFormulae =false;

4756#endif

4757

4758// Collect the best formula for each unique set of shared registers. This

4759// is reset for each use.

4760usingBestFormulaeTy =

4761DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;

4762

4763 BestFormulaeTy BestFormulae;

4764

4765for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

4766 LSRUse &LU =Uses[LUIdx];

4767LLVM_DEBUG(dbgs() <<"Filtering for use "; LU.print(dbgs());

4768dbgs() <<'\n');

4769

4770boolAny =false;

4771for (size_t FIdx = 0, NumForms = LU.Formulae.size();

4772 FIdx != NumForms; ++FIdx) {

4773 Formula &F = LU.Formulae[FIdx];

4774

4775// Some formulas are instant losers. For example, they may depend on

4776// nonexistent AddRecs from other loops. These need to be filtered

4777// immediately, otherwise heuristics could choose them over others leading

4778// to an unsatisfactory solution. Passing LoserRegs into RateFormula here

4779// avoids the need to recompute this information across formulae using the

4780// same bad AddRec. Passing LoserRegs is also essential unless we remove

4781// the corresponding bad register from the Regs set.

4782Cost CostF(L, SE,TTI, AMK);

4783 Regs.clear();

4784 CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);

4785if (CostF.isLoser()) {

4786// During initial formula generation, undesirable formulae are generated

4787// by uses within other loops that have some non-trivial address mode or

4788// use the postinc form of the IV. LSR needs to provide these formulae

4789// as the basis of rediscovering the desired formula that uses an AddRec

4790// corresponding to the existing phi. Once all formulae have been

4791// generated, these initial losers may be pruned.

4792LLVM_DEBUG(dbgs() <<" Filtering loser ";F.print(dbgs());

4793dbgs() <<"\n");

4794 }

4795else {

4796SmallVector<const SCEV *, 4>Key;

4797for (constSCEV *Reg :F.BaseRegs) {

4798if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))

4799Key.push_back(Reg);

4800 }

4801if (F.ScaledReg &&

4802 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))

4803Key.push_back(F.ScaledReg);

4804// Unstable sort by host order ok, because this is only used for

4805// uniquifying.

4806llvm::sort(Key);

4807

4808 std::pair<BestFormulaeTy::const_iterator, bool>P =

4809 BestFormulae.insert(std::make_pair(Key, FIdx));

4810if (P.second)

4811continue;

4812

4813 Formula &Best = LU.Formulae[P.first->second];

4814

4815Cost CostBest(L, SE,TTI, AMK);

4816 Regs.clear();

4817 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);

4818if (CostF.isLess(CostBest))

4819std::swap(F, Best);

4820LLVM_DEBUG(dbgs() <<" Filtering out formula ";F.print(dbgs());

4821dbgs() <<"\n"

4822" in favor of formula ";

4823 Best.print(dbgs());dbgs() <<'\n');

4824 }

4825#ifndef NDEBUG

4826 ChangedFormulae =true;

4827#endif

4828 LU.DeleteFormula(F);

4829 --FIdx;

4830 --NumForms;

4831Any =true;

4832 }

4833

4834// Now that we've filtered out some formulae, recompute the Regs set.

4835if (Any)

4836 LU.RecomputeRegs(LUIdx, RegUses);

4837

4838// Reset this to prepare for the next use.

4839 BestFormulae.clear();

4840 }

4841

4842LLVM_DEBUG(if (ChangedFormulae) {

4843dbgs() <<"\n"

4844"After filtering out undesirable candidates:\n";

4845 print_uses(dbgs());

4846 });

4847}

4848

4849/// Estimate the worst-case number of solutions the solver might have to

4850/// consider. It almost never considers this many solutions because it prune the

4851/// search space, but the pruning isn't always sufficient.

4852size_t LSRInstance::EstimateSearchSpaceComplexity() const{

4853size_t Power = 1;

4854for (const LSRUse &LU :Uses) {

4855size_t FSize = LU.Formulae.size();

4856if (FSize >=ComplexityLimit) {

4857 Power =ComplexityLimit;

4858break;

4859 }

4860 Power *= FSize;

4861if (Power >=ComplexityLimit)

4862break;

4863 }

4864return Power;

4865}

4866

4867/// When one formula uses a superset of the registers of another formula, it

4868/// won't help reduce register pressure (though it may not necessarily hurt

4869/// register pressure); remove it to simplify the system.

4870void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {

4871if (EstimateSearchSpaceComplexity() >=ComplexityLimit) {

4872LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");

4873

4874LLVM_DEBUG(dbgs() <<"Narrowing the search space by eliminating formulae "

4875"which use a superset of registers used by other "

4876"formulae.\n");

4877

4878for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

4879 LSRUse &LU =Uses[LUIdx];

4880boolAny =false;

4881for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

4882 Formula &F = LU.Formulae[i];

4883if (F.BaseOffset.isNonZero() &&F.BaseOffset.isScalable())

4884continue;

4885// Look for a formula with a constant or GV in a register. If the use

4886// also has a formula with that same value in an immediate field,

4887// delete the one that uses a register.

4888for (SmallVectorImpl<const SCEV *>::const_iterator

4889I =F.BaseRegs.begin(), E =F.BaseRegs.end();I != E; ++I) {

4890if (constSCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {

4891 Formula NewF =F;

4892//FIXME: Formulas should store bitwidth to do wrapping properly.

4893// See PR41034.

4894 NewF.BaseOffset =

4895 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +

4896 (uint64_t)C->getValue()->getSExtValue());

4897 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

4898 (I -F.BaseRegs.begin()));

4899if (LU.HasFormulaWithSameRegs(NewF)) {

4900LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());

4901dbgs() <<'\n');

4902 LU.DeleteFormula(F);

4903 --i;

4904 --e;

4905Any =true;

4906break;

4907 }

4908 }elseif (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {

4909if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))

4910if (!F.BaseGV) {

4911 Formula NewF =F;

4912 NewF.BaseGV = GV;

4913 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

4914 (I -F.BaseRegs.begin()));

4915if (LU.HasFormulaWithSameRegs(NewF)) {

4916LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());

4917dbgs() <<'\n');

4918 LU.DeleteFormula(F);

4919 --i;

4920 --e;

4921Any =true;

4922break;

4923 }

4924 }

4925 }

4926 }

4927 }

4928if (Any)

4929 LU.RecomputeRegs(LUIdx, RegUses);

4930 }

4931

4932LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));

4933 }

4934}

4935

4936/// When there are many registers for expressions like A, A+1, A+2, etc.,

4937/// allocate a single register for them.

4938void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {

4939if (EstimateSearchSpaceComplexity() <ComplexityLimit)

4940return;

4941

4942LLVM_DEBUG(

4943dbgs() <<"The search space is too complex.\n"

4944"Narrowing the search space by assuming that uses separated "

4945"by a constant offset will use the same registers.\n");

4946

4947// This is especially useful for unrolled loops.

4948

4949for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

4950 LSRUse &LU =Uses[LUIdx];

4951for (const Formula &F : LU.Formulae) {

4952if (F.BaseOffset.isZero() || (F.Scale != 0 &&F.Scale != 1))

4953continue;

4954

4955 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);

4956if (!LUThatHas)

4957continue;

4958

4959if (!reconcileNewOffset(*LUThatHas,F.BaseOffset,/*HasBaseReg=*/false,

4960 LU.Kind, LU.AccessTy))

4961continue;

4962

4963LLVM_DEBUG(dbgs() <<" Deleting use "; LU.print(dbgs());dbgs() <<'\n');

4964

4965 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;

4966

4967// Transfer the fixups of LU to LUThatHas.

4968for (LSRFixup &Fixup : LU.Fixups) {

4969Fixup.Offset +=F.BaseOffset;

4970 LUThatHas->pushFixup(Fixup);

4971LLVM_DEBUG(dbgs() <<"New fixup has offset " <<Fixup.Offset <<'\n');

4972 }

4973

4974// Delete formulae from the new use which are no longer legal.

4975boolAny =false;

4976for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {

4977 Formula &F = LUThatHas->Formulae[i];

4978if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,

4979 LUThatHas->Kind, LUThatHas->AccessTy,F)) {

4980LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());dbgs() <<'\n');

4981 LUThatHas->DeleteFormula(F);

4982 --i;

4983 --e;

4984Any =true;

4985 }

4986 }

4987

4988if (Any)

4989 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);

4990

4991// Delete the old use.

4992 DeleteUse(LU, LUIdx);

4993 --LUIdx;

4994 --NumUses;

4995break;

4996 }

4997 }

4998

4999LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));

5000}

5001

5002/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that

5003/// we've done more filtering, as it may be able to find more formulae to

5004/// eliminate.

5005void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){

5006if (EstimateSearchSpaceComplexity() >=ComplexityLimit) {

5007LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");

5008

5009LLVM_DEBUG(dbgs() <<"Narrowing the search space by re-filtering out "

5010"undesirable dedicated registers.\n");

5011

5012 FilterOutUndesirableDedicatedRegisters();

5013

5014LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));

5015 }

5016}

5017

5018/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.

5019/// Pick the best one and delete the others.

5020/// This narrowing heuristic is to keep as many formulae with different

5021/// Scale and ScaledReg pair as possible while narrowing the search space.

5022/// The benefit is that it is more likely to find out a better solution

5023/// from a formulae set with more Scale and ScaledReg variations than

5024/// a formulae set with the same Scale and ScaledReg. The picking winner

5025/// reg heuristic will often keep the formulae with the same Scale and

5026/// ScaledReg and filter others, and we want to avoid that if possible.

5027void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {

5028if (EstimateSearchSpaceComplexity() <ComplexityLimit)

5029return;

5030

5031LLVM_DEBUG(

5032dbgs() <<"The search space is too complex.\n"

5033"Narrowing the search space by choosing the best Formula "

5034"from the Formulae with the same Scale and ScaledReg.\n");

5035

5036// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.

5037usingBestFormulaeTy =DenseMap<std::pair<const SCEV *, int64_t>,size_t>;

5038

5039 BestFormulaeTy BestFormulae;

5040#ifndef NDEBUG

5041bool ChangedFormulae =false;

5042#endif

5043DenseSet<const SCEV *> VisitedRegs;

5044SmallPtrSet<const SCEV *, 16> Regs;

5045

5046for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

5047 LSRUse &LU =Uses[LUIdx];

5048LLVM_DEBUG(dbgs() <<"Filtering for use "; LU.print(dbgs());

5049dbgs() <<'\n');

5050

5051// Return true if Formula FA is better than Formula FB.

5052auto IsBetterThan = [&](Formula &FA, Formula &FB) {

5053// First we will try to choose the Formula with fewer new registers.

5054// For a register used by current Formula, the more the register is

5055// shared among LSRUses, the less we increase the register number

5056// counter of the formula.

5057size_t FARegNum = 0;

5058for (constSCEV *Reg : FA.BaseRegs) {

5059constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

5060 FARegNum += (NumUses - UsedByIndices.count() + 1);

5061 }

5062size_t FBRegNum = 0;

5063for (constSCEV *Reg : FB.BaseRegs) {

5064constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

5065 FBRegNum += (NumUses - UsedByIndices.count() + 1);

5066 }

5067if (FARegNum != FBRegNum)

5068return FARegNum < FBRegNum;

5069

5070// If the new register numbers are the same, choose the Formula with

5071// less Cost.

5072Cost CostFA(L, SE,TTI, AMK);

5073Cost CostFB(L, SE,TTI, AMK);

5074 Regs.clear();

5075 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);

5076 Regs.clear();

5077 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);

5078return CostFA.isLess(CostFB);

5079 };

5080

5081boolAny =false;

5082for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

5083 ++FIdx) {

5084 Formula &F = LU.Formulae[FIdx];

5085if (!F.ScaledReg)

5086continue;

5087autoP = BestFormulae.insert({{F.ScaledReg,F.Scale}, FIdx});

5088if (P.second)

5089continue;

5090

5091 Formula &Best = LU.Formulae[P.first->second];

5092if (IsBetterThan(F, Best))

5093std::swap(F, Best);

5094LLVM_DEBUG(dbgs() <<" Filtering out formula ";F.print(dbgs());

5095dbgs() <<"\n"

5096" in favor of formula ";

5097 Best.print(dbgs());dbgs() <<'\n');

5098#ifndef NDEBUG

5099 ChangedFormulae =true;

5100#endif

5101 LU.DeleteFormula(F);

5102 --FIdx;

5103 --NumForms;

5104Any =true;

5105 }

5106if (Any)

5107 LU.RecomputeRegs(LUIdx, RegUses);

5108

5109// Reset this to prepare for the next use.

5110 BestFormulae.clear();

5111 }

5112

5113LLVM_DEBUG(if (ChangedFormulae) {

5114dbgs() <<"\n"

5115"After filtering out undesirable candidates:\n";

5116 print_uses(dbgs());

5117 });

5118}

5119

5120/// If we are over the complexity limit, filter out any post-inc prefering

5121/// variables to only post-inc values.

5122void LSRInstance::NarrowSearchSpaceByFilterPostInc() {

5123if (AMK !=TTI::AMK_PostIndexed)

5124return;

5125if (EstimateSearchSpaceComplexity() <ComplexityLimit)

5126return;

5127

5128LLVM_DEBUG(dbgs() <<"The search space is too complex.\n"

5129"Narrowing the search space by choosing the lowest "

5130"register Formula for PostInc Uses.\n");

5131

5132for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

5133 LSRUse &LU =Uses[LUIdx];

5134

5135if (LU.Kind != LSRUse::Address)

5136continue;

5137if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&

5138 !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))

5139continue;

5140

5141size_t MinRegs = std::numeric_limits<size_t>::max();

5142for (const Formula &F : LU.Formulae)

5143 MinRegs = std::min(F.getNumRegs(), MinRegs);

5144

5145boolAny =false;

5146for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

5147 ++FIdx) {

5148 Formula &F = LU.Formulae[FIdx];

5149if (F.getNumRegs() > MinRegs) {

5150LLVM_DEBUG(dbgs() <<" Filtering out formula ";F.print(dbgs());

5151dbgs() <<"\n");

5152 LU.DeleteFormula(F);

5153 --FIdx;

5154 --NumForms;

5155Any =true;

5156 }

5157 }

5158if (Any)

5159 LU.RecomputeRegs(LUIdx, RegUses);

5160

5161if (EstimateSearchSpaceComplexity() <ComplexityLimit)

5162break;

5163 }

5164

5165LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));

5166}

5167

5168/// The function delete formulas with high registers number expectation.

5169/// Assuming we don't know the value of each formula (already delete

5170/// all inefficient), generate probability of not selecting for each

5171/// register.

5172/// For example,

5173/// Use1:

5174/// reg(a) + reg({0,+,1})

5175/// reg(a) + reg({-1,+,1}) + 1

5176/// reg({a,+,1})

5177/// Use2:

5178/// reg(b) + reg({0,+,1})

5179/// reg(b) + reg({-1,+,1}) + 1

5180/// reg({b,+,1})

5181/// Use3:

5182/// reg(c) + reg(b) + reg({0,+,1})

5183/// reg(c) + reg({b,+,1})

5184///

5185/// Probability of not selecting

5186/// Use1 Use2 Use3

5187/// reg(a) (1/3) * 1 * 1

5188/// reg(b) 1 * (1/3) * (1/2)

5189/// reg({0,+,1}) (2/3) * (2/3) * (1/2)

5190/// reg({-1,+,1}) (2/3) * (2/3) * 1

5191/// reg({a,+,1}) (2/3) * 1 * 1

5192/// reg({b,+,1}) 1 * (2/3) * (2/3)

5193/// reg(c) 1 * 1 * 0

5194///

5195/// Now count registers number mathematical expectation for each formula:

5196/// Note that for each use we exclude probability if not selecting for the use.

5197/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding

5198/// probabilty 1/3 of not selecting for Use1).

5199/// Use1:

5200/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted

5201/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted

5202/// reg({a,+,1}) 1

5203/// Use2:

5204/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted

5205/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted

5206/// reg({b,+,1}) 2/3

5207/// Use3:

5208/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted

5209/// reg(c) + reg({b,+,1}) 1 + 2/3

5210void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {

5211if (EstimateSearchSpaceComplexity() <ComplexityLimit)

5212return;

5213// Ok, we have too many of formulae on our hands to conveniently handle.

5214// Use a rough heuristic to thin out the list.

5215

5216// Set of Regs wich will be 100% used in final solution.

5217// Used in each formula of a solution (in example above this is reg(c)).

5218// We can skip them in calculations.

5219SmallPtrSet<const SCEV *, 4> UniqRegs;

5220LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");

5221

5222// Map each register to probability of not selecting

5223 DenseMap <const SCEV *, float> RegNumMap;

5224for (constSCEV *Reg : RegUses) {

5225if (UniqRegs.count(Reg))

5226continue;

5227float PNotSel = 1;

5228for (const LSRUse &LU :Uses) {

5229if (!LU.Regs.count(Reg))

5230continue;

5231floatP = LU.getNotSelectedProbability(Reg);

5232if (P != 0.0)

5233 PNotSel *=P;

5234else

5235 UniqRegs.insert(Reg);

5236 }

5237 RegNumMap.insert(std::make_pair(Reg, PNotSel));

5238 }

5239

5240LLVM_DEBUG(

5241dbgs() <<"Narrowing the search space by deleting costly formulas\n");

5242

5243// Delete formulas where registers number expectation is high.

5244for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

5245 LSRUse &LU =Uses[LUIdx];

5246// If nothing to delete - continue.

5247if (LU.Formulae.size() < 2)

5248continue;

5249// This is temporary solution to test performance. Float should be

5250// replaced with round independent type (based on integers) to avoid

5251// different results for different target builds.

5252float FMinRegNum = LU.Formulae[0].getNumRegs();

5253float FMinARegNum = LU.Formulae[0].getNumRegs();

5254size_t MinIdx = 0;

5255for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

5256 Formula &F = LU.Formulae[i];

5257float FRegNum = 0;

5258float FARegNum = 0;

5259for (constSCEV *BaseReg :F.BaseRegs) {

5260if (UniqRegs.count(BaseReg))

5261continue;

5262 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

5263if (isa<SCEVAddRecExpr>(BaseReg))

5264 FARegNum +=

5265 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

5266 }

5267if (constSCEV *ScaledReg =F.ScaledReg) {

5268if (!UniqRegs.count(ScaledReg)) {

5269 FRegNum +=

5270 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

5271if (isa<SCEVAddRecExpr>(ScaledReg))

5272 FARegNum +=

5273 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

5274 }

5275 }

5276if (FMinRegNum > FRegNum ||

5277 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {

5278 FMinRegNum = FRegNum;

5279 FMinARegNum = FARegNum;

5280 MinIdx = i;

5281 }

5282 }

5283LLVM_DEBUG(dbgs() <<" The formula "; LU.Formulae[MinIdx].print(dbgs());

5284dbgs() <<" with min reg num " << FMinRegNum <<'\n');

5285if (MinIdx != 0)

5286std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);

5287while (LU.Formulae.size() != 1) {

5288LLVM_DEBUG(dbgs() <<" Deleting "; LU.Formulae.back().print(dbgs());

5289dbgs() <<'\n');

5290 LU.Formulae.pop_back();

5291 }

5292 LU.RecomputeRegs(LUIdx, RegUses);

5293assert(LU.Formulae.size() == 1 &&"Should be exactly 1 min regs formula");

5294 Formula &F = LU.Formulae[0];

5295LLVM_DEBUG(dbgs() <<" Leaving only ";F.print(dbgs());dbgs() <<'\n');

5296// When we choose the formula, the regs become unique.

5297 UniqRegs.insert(F.BaseRegs.begin(),F.BaseRegs.end());

5298if (F.ScaledReg)

5299 UniqRegs.insert(F.ScaledReg);

5300 }

5301LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));

5302}

5303

5304// Check if Best and Reg are SCEVs separated by a constant amount C, and if so

5305// would the addressing offset +C would be legal where the negative offset -C is

5306// not.

5307staticboolIsSimplerBaseSCEVForTarget(constTargetTransformInfo &TTI,

5308ScalarEvolution &SE,constSCEV *Best,

5309constSCEV *Reg,

5310 MemAccessTy AccessType) {

5311if (Best->getType() != Reg->getType() ||

5312 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&

5313 cast<SCEVAddRecExpr>(Best)->getLoop() !=

5314 cast<SCEVAddRecExpr>(Reg)->getLoop()))

5315returnfalse;

5316 std::optional<APInt> Diff = SE.computeConstantDifference(Best, Reg);

5317if (!Diff)

5318returnfalse;

5319

5320returnTTI.isLegalAddressingMode(

5321 AccessType.MemTy,/*BaseGV=*/nullptr,

5322/*BaseOffset=*/Diff->getSExtValue(),

5323/*HasBaseReg=*/true,/*Scale=*/0, AccessType.AddrSpace) &&

5324 !TTI.isLegalAddressingMode(

5325 AccessType.MemTy,/*BaseGV=*/nullptr,

5326/*BaseOffset=*/-Diff->getSExtValue(),

5327/*HasBaseReg=*/true,/*Scale=*/0, AccessType.AddrSpace);

5328}

5329

5330/// Pick a register which seems likely to be profitable, and then in any use

5331/// which has any reference to that register, delete all formulae which do not

5332/// reference that register.

5333void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {

5334// With all other options exhausted, loop until the system is simple

5335// enough to handle.

5336SmallPtrSet<const SCEV *, 4> Taken;

5337while (EstimateSearchSpaceComplexity() >=ComplexityLimit) {

5338// Ok, we have too many of formulae on our hands to conveniently handle.

5339// Use a rough heuristic to thin out the list.

5340LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");

5341

5342// Pick the register which is used by the most LSRUses, which is likely

5343// to be a good reuse register candidate.

5344constSCEV *Best =nullptr;

5345unsigned BestNum = 0;

5346for (constSCEV *Reg : RegUses) {

5347if (Taken.count(Reg))

5348continue;

5349if (!Best) {

5350 Best =Reg;

5351 BestNum = RegUses.getUsedByIndices(Reg).count();

5352 }else {

5353unsigned Count = RegUses.getUsedByIndices(Reg).count();

5354if (Count > BestNum) {

5355 Best =Reg;

5356 BestNum = Count;

5357 }

5358

5359// If the scores are the same, but the Reg is simpler for the target

5360// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can

5361// handle +C but not -C), opt for the simpler formula.

5362if (Count == BestNum) {

5363int LUIdx = RegUses.getUsedByIndices(Reg).find_first();

5364if (LUIdx >= 0 &&Uses[LUIdx].Kind == LSRUse::Address &&

5365IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,

5366Uses[LUIdx].AccessTy)) {

5367 Best =Reg;

5368 BestNum = Count;

5369 }

5370 }

5371 }

5372 }

5373assert(Best &&"Failed to find best LSRUse candidate");

5374

5375LLVM_DEBUG(dbgs() <<"Narrowing the search space by assuming " << *Best

5376 <<" will yield profitable reuse.\n");

5377 Taken.insert(Best);

5378

5379// In any use with formulae which references this register, delete formulae

5380// which don't reference it.

5381for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {

5382 LSRUse &LU =Uses[LUIdx];

5383if (!LU.Regs.count(Best))continue;

5384

5385boolAny =false;

5386for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

5387 Formula &F = LU.Formulae[i];

5388if (!F.referencesReg(Best)) {

5389LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());dbgs() <<'\n');

5390 LU.DeleteFormula(F);

5391 --e;

5392 --i;

5393Any =true;

5394assert(e != 0 &&"Use has no formulae left! Is Regs inconsistent?");

5395continue;

5396 }

5397 }

5398

5399if (Any)

5400 LU.RecomputeRegs(LUIdx, RegUses);

5401 }

5402

5403LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));

5404 }

5405}

5406

5407/// If there are an extraordinary number of formulae to choose from, use some

5408/// rough heuristics to prune down the number of formulae. This keeps the main

5409/// solver from taking an extraordinary amount of time in some worst-case

5410/// scenarios.

5411void LSRInstance::NarrowSearchSpaceUsingHeuristics() {

5412 NarrowSearchSpaceByDetectingSupersets();

5413 NarrowSearchSpaceByCollapsingUnrolledCode();

5414 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

5415if (FilterSameScaledReg)

5416 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

5417 NarrowSearchSpaceByFilterPostInc();

5418if (LSRExpNarrow)

5419 NarrowSearchSpaceByDeletingCostlyFormulas();

5420else

5421 NarrowSearchSpaceByPickingWinnerRegs();

5422}

5423

5424/// This is the recursive solver.

5425void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

5426Cost &SolutionCost,

5427SmallVectorImpl<const Formula *> &Workspace,

5428constCost &CurCost,

5429constSmallPtrSet<const SCEV *, 16> &CurRegs,

5430DenseSet<const SCEV *> &VisitedRegs) const{

5431// Some ideas:

5432// - prune more:

5433// - use more aggressive filtering

5434// - sort the formula so that the most profitable solutions are found first

5435// - sort the uses too

5436// - search faster:

5437// - don't compute a cost, and then compare. compare while computing a cost

5438// and bail early.

5439// - track register sets with SmallBitVector

5440

5441const LSRUse &LU =Uses[Workspace.size()];

5442

5443// If this use references any register that's already a part of the

5444// in-progress solution, consider it a requirement that a formula must

5445// reference that register in order to be considered. This prunes out

5446// unprofitable searching.

5447SmallSetVector<const SCEV *, 4> ReqRegs;

5448for (constSCEV *S : CurRegs)

5449if (LU.Regs.count(S))

5450 ReqRegs.insert(S);

5451

5452SmallPtrSet<const SCEV *, 16> NewRegs;

5453Cost NewCost(L, SE,TTI, AMK);

5454for (const Formula &F : LU.Formulae) {

5455// Ignore formulae which may not be ideal in terms of register reuse of

5456// ReqRegs. The formula should use all required registers before

5457// introducing new ones.

5458// This can sometimes (notably when trying to favour postinc) lead to

5459// sub-optimial decisions. There it is best left to the cost modelling to

5460// get correct.

5461if (AMK !=TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {

5462int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());

5463for (constSCEV *Reg : ReqRegs) {

5464if ((F.ScaledReg &&F.ScaledReg == Reg) ||

5465is_contained(F.BaseRegs, Reg)) {

5466 --NumReqRegsToFind;

5467if (NumReqRegsToFind == 0)

5468break;

5469 }

5470 }

5471if (NumReqRegsToFind != 0) {

5472// If none of the formulae satisfied the required registers, then we could

5473// clear ReqRegs and try again. Currently, we simply give up in this case.

5474continue;

5475 }

5476 }

5477

5478// Evaluate the cost of the current formula. If it's already worse than

5479// the current best, prune the search at that point.

5480 NewCost = CurCost;

5481 NewRegs = CurRegs;

5482 NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);

5483if (NewCost.isLess(SolutionCost)) {

5484 Workspace.push_back(&F);

5485if (Workspace.size() !=Uses.size()) {

5486 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,

5487 NewRegs, VisitedRegs);

5488if (F.getNumRegs() == 1 && Workspace.size() == 1)

5489 VisitedRegs.insert(F.ScaledReg ?F.ScaledReg :F.BaseRegs[0]);

5490 }else {

5491LLVM_DEBUG(dbgs() <<"New best at "; NewCost.print(dbgs());

5492dbgs() <<".\nRegs:\n";

5493for (constSCEV *S : NewRegs)dbgs()

5494 <<"- " << *S <<"\n";

5495dbgs() <<'\n');

5496

5497 SolutionCost = NewCost;

5498 Solution = Workspace;

5499 }

5500 Workspace.pop_back();

5501 }

5502 }

5503}

5504

5505/// Choose one formula from each use. Return the results in the given Solution

5506/// vector.

5507void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const{

5508SmallVector<const Formula *, 8> Workspace;

5509Cost SolutionCost(L, SE,TTI, AMK);

5510 SolutionCost.Lose();

5511Cost CurCost(L, SE,TTI, AMK);

5512SmallPtrSet<const SCEV *, 16> CurRegs;

5513DenseSet<const SCEV *> VisitedRegs;

5514 Workspace.reserve(Uses.size());

5515

5516// SolveRecurse does all the work.

5517 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,

5518 CurRegs, VisitedRegs);

5519if (Solution.empty()) {

5520LLVM_DEBUG(dbgs() <<"\nNo Satisfactory Solution\n");

5521return;

5522 }

5523

5524// Ok, we've now made all our decisions.

5525LLVM_DEBUG(dbgs() <<"\n"

5526"The chosen solution requires ";

5527 SolutionCost.print(dbgs());dbgs() <<":\n";

5528for (size_t i = 0, e =Uses.size(); i != e; ++i) {

5529dbgs() <<" ";

5530Uses[i].print(dbgs());

5531dbgs() <<"\n"

5532" ";

5533 Solution[i]->print(dbgs());

5534dbgs() <<'\n';

5535 });

5536

5537assert(Solution.size() ==Uses.size() &&"Malformed solution!");

5538

5539constbool EnableDropUnprofitableSolution = [&] {

5540switch (AllowDropSolutionIfLessProfitable) {

5541casecl::BOU_TRUE:

5542returntrue;

5543casecl::BOU_FALSE:

5544returnfalse;

5545casecl::BOU_UNSET:

5546returnTTI.shouldDropLSRSolutionIfLessProfitable();

5547 }

5548llvm_unreachable("Unhandled cl::boolOrDefault enum");

5549 }();

5550

5551if (BaselineCost.isLess(SolutionCost)) {

5552if (!EnableDropUnprofitableSolution)

5553LLVM_DEBUG(

5554dbgs() <<"Baseline is more profitable than chosen solution, "

5555"add option 'lsr-drop-solution' to drop LSR solution.\n");

5556else {

5557LLVM_DEBUG(dbgs() <<"Baseline is more profitable than chosen "

5558"solution, dropping LSR solution.\n";);

5559 Solution.clear();

5560 }

5561 }

5562}

5563

5564/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as

5565/// we can go while still being dominated by the input positions. This helps

5566/// canonicalize the insert position, which encourages sharing.

5567BasicBlock::iterator

5568LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,

5569constSmallVectorImpl<Instruction *> &Inputs)

5570 const{

5571Instruction *Tentative = &*IP;

5572while (true) {

5573bool AllDominate =true;

5574Instruction *BetterPos =nullptr;

5575// Don't bother attempting to insert before a catchswitch, their basic block

5576// cannot have other non-PHI instructions.

5577if (isa<CatchSwitchInst>(Tentative))

5578return IP;

5579

5580for (Instruction *Inst : Inputs) {

5581if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {

5582 AllDominate =false;

5583break;

5584 }

5585// Attempt to find an insert position in the middle of the block,

5586// instead of at the end, so that it can be used for other expansions.

5587if (Tentative->getParent() == Inst->getParent() &&

5588 (!BetterPos || !DT.dominates(Inst, BetterPos)))

5589 BetterPos = &*std::next(BasicBlock::iterator(Inst));

5590 }

5591if (!AllDominate)

5592break;

5593if (BetterPos)

5594 IP = BetterPos->getIterator();

5595else

5596 IP = Tentative->getIterator();

5597

5598constLoop *IPLoop = LI.getLoopFor(IP->getParent());

5599unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;

5600

5601BasicBlock *IDom;

5602for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {

5603if (!Rung)return IP;

5604 Rung = Rung->getIDom();

5605if (!Rung)return IP;

5606 IDom = Rung->getBlock();

5607

5608// Don't climb into a loop though.

5609constLoop *IDomLoop = LI.getLoopFor(IDom);

5610unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;

5611if (IDomDepth <= IPLoopDepth &&

5612 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))

5613break;

5614 }

5615

5616 Tentative = IDom->getTerminator();

5617 }

5618

5619return IP;

5620}

5621

5622/// Determine an input position which will be dominated by the operands and

5623/// which will dominate the result.

5624BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(

5625BasicBlock::iterator LowestIP,const LSRFixup &LF,const LSRUse &LU) const{

5626// Collect some instructions which must be dominated by the

5627// expanding replacement. These must be dominated by any operands that

5628// will be required in the expansion.

5629SmallVector<Instruction *, 4> Inputs;

5630if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))

5631 Inputs.push_back(I);

5632if (LU.Kind == LSRUse::ICmpZero)

5633if (Instruction *I =

5634 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))

5635 Inputs.push_back(I);

5636if (LF.PostIncLoops.count(L)) {

5637if (LF.isUseFullyOutsideLoop(L))

5638 Inputs.push_back(L->getLoopLatch()->getTerminator());

5639else

5640 Inputs.push_back(IVIncInsertPos);

5641 }

5642// The expansion must also be dominated by the increment positions of any

5643// loops it for which it is using post-inc mode.

5644for (constLoop *PIL : LF.PostIncLoops) {

5645if (PIL == L)continue;

5646

5647// Be dominated by the loop exit.

5648SmallVector<BasicBlock *, 4> ExitingBlocks;

5649 PIL->getExitingBlocks(ExitingBlocks);

5650if (!ExitingBlocks.empty()) {

5651BasicBlock *BB = ExitingBlocks[0];

5652for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)

5653 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);

5654 Inputs.push_back(BB->getTerminator());

5655 }

5656 }

5657

5658assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()

5659 && !isa<DbgInfoIntrinsic>(LowestIP) &&

5660"Insertion point must be a normal instruction");

5661

5662// Then, climb up the immediate dominator tree as far as we can go while

5663// still being dominated by the input positions.

5664BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);

5665

5666// Don't insert instructions before PHI nodes.

5667while (isa<PHINode>(IP)) ++IP;

5668

5669// Ignore landingpad instructions.

5670while (IP->isEHPad()) ++IP;

5671

5672// Ignore debug intrinsics.

5673while (isa<DbgInfoIntrinsic>(IP)) ++IP;

5674

5675// Set IP below instructions recently inserted by SCEVExpander. This keeps the

5676// IP consistent across expansions and allows the previously inserted

5677// instructions to be reused by subsequent expansion.

5678while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)

5679 ++IP;

5680

5681return IP;

5682}

5683

5684/// Emit instructions for the leading candidate expression for this LSRUse (this

5685/// is called "expanding").

5686Value *LSRInstance::Expand(const LSRUse &LU,const LSRFixup &LF,

5687const Formula &F,BasicBlock::iterator IP,

5688SmallVectorImpl<WeakTrackingVH> &DeadInsts) const{

5689if (LU.RigidFormula)

5690return LF.OperandValToReplace;

5691

5692// Determine an input position which will be dominated by the operands and

5693// which will dominate the result.

5694 IP = AdjustInsertPositionForExpand(IP, LF, LU);

5695Rewriter.setInsertPoint(&*IP);

5696

5697// Inform the Rewriter if we have a post-increment use, so that it can

5698// perform an advantageous expansion.

5699Rewriter.setPostInc(LF.PostIncLoops);

5700

5701// This is the type that the user actually needs.

5702Type *OpTy = LF.OperandValToReplace->getType();

5703// This will be the type that we'll initially expand to.

5704Type *Ty =F.getType();

5705if (!Ty)

5706// No type known; just expand directly to the ultimate type.

5707 Ty = OpTy;

5708elseif (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))

5709// Expand directly to the ultimate type if it's the right size.

5710 Ty = OpTy;

5711// This is the type to do integer arithmetic in.

5712Type *IntTy = SE.getEffectiveSCEVType(Ty);

5713

5714// Build up a list of operands to add together to form the full base.

5715SmallVector<const SCEV *, 8> Ops;

5716

5717// Expand the BaseRegs portion.

5718for (constSCEV *Reg :F.BaseRegs) {

5719assert(!Reg->isZero() &&"Zero allocated in a base register!");

5720

5721// If we're expanding for a post-inc user, make the post-inc adjustment.

5722Reg =denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);

5723 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg,nullptr)));

5724 }

5725

5726// Expand the ScaledReg portion.

5727Value *ICmpScaledV =nullptr;

5728if (F.Scale != 0) {

5729constSCEV *ScaledS =F.ScaledReg;

5730

5731// If we're expanding for a post-inc user, make the post-inc adjustment.

5732PostIncLoopSet &Loops =const_cast<PostIncLoopSet &>(LF.PostIncLoops);

5733 ScaledS =denormalizeForPostIncUse(ScaledS,Loops, SE);

5734

5735if (LU.Kind == LSRUse::ICmpZero) {

5736// Expand ScaleReg as if it was part of the base regs.

5737if (F.Scale == 1)

5738 Ops.push_back(

5739 SE.getUnknown(Rewriter.expandCodeFor(ScaledS,nullptr)));

5740else {

5741// An interesting way of "folding" with an icmp is to use a negated

5742// scale, which we'll implement by inserting it into the other operand

5743// of the icmp.

5744assert(F.Scale == -1 &&

5745"The only scale supported by ICmpZero uses is -1!");

5746 ICmpScaledV =Rewriter.expandCodeFor(ScaledS,nullptr);

5747 }

5748 }else {

5749// Otherwise just expand the scaled register and an explicit scale,

5750// which is expected to be matched as part of the address.

5751

5752// Flush the operand list to suppress SCEVExpander hoisting address modes.

5753// Unless the addressing mode will not be folded.

5754if (!Ops.empty() && LU.Kind == LSRUse::Address &&

5755isAMCompletelyFolded(TTI, LU,F)) {

5756Value *FullV =Rewriter.expandCodeFor(SE.getAddExpr(Ops),nullptr);

5757 Ops.clear();

5758 Ops.push_back(SE.getUnknown(FullV));

5759 }

5760 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS,nullptr));

5761if (F.Scale != 1)

5762 ScaledS =

5763 SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(),F.Scale));

5764 Ops.push_back(ScaledS);

5765 }

5766 }

5767

5768// Expand the GV portion.

5769if (F.BaseGV) {

5770// Flush the operand list to suppress SCEVExpander hoisting.

5771if (!Ops.empty()) {

5772Value *FullV =Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);

5773 Ops.clear();

5774 Ops.push_back(SE.getUnknown(FullV));

5775 }

5776 Ops.push_back(SE.getUnknown(F.BaseGV));

5777 }

5778

5779// Flush the operand list to suppress SCEVExpander hoisting of both folded and

5780// unfolded offsets. LSR assumes they both live next to their uses.

5781if (!Ops.empty()) {

5782Value *FullV =Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);

5783 Ops.clear();

5784 Ops.push_back(SE.getUnknown(FullV));

5785 }

5786

5787// FIXME: Are we sure we won't get a mismatch here? Is there a way to bail

5788// out at this point, or should we generate a SCEV adding together mixed

5789// offsets?

5790assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&

5791"Expanding mismatched offsets\n");

5792// Expand the immediate portion.

5793 ImmediateOffset =F.BaseOffset.addUnsigned(LF.Offset);

5794if (Offset.isNonZero()) {

5795if (LU.Kind == LSRUse::ICmpZero) {

5796// The other interesting way of "folding" with an ICmpZero is to use a

5797// negated immediate.

5798if (!ICmpScaledV)

5799 ICmpScaledV =

5800 ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue());

5801else {

5802 Ops.push_back(SE.getUnknown(ICmpScaledV));

5803 ICmpScaledV = ConstantInt::get(IntTy,Offset.getFixedValue());

5804 }

5805 }else {

5806// Just add the immediate values. These again are expected to be matched

5807// as part of the address.

5808 Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));

5809 }

5810 }

5811

5812// Expand the unfolded offset portion.

5813 Immediate UnfoldedOffset =F.UnfoldedOffset;

5814if (UnfoldedOffset.isNonZero()) {

5815// Just add the immediate values.

5816 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));

5817 }

5818

5819// Emit instructions summing all the operands.

5820constSCEV *FullS = Ops.empty() ?

5821 SE.getConstant(IntTy, 0) :

5822 SE.getAddExpr(Ops);

5823Value *FullV =Rewriter.expandCodeFor(FullS, Ty);

5824

5825// We're done expanding now, so reset the rewriter.

5826Rewriter.clearPostInc();

5827

5828// An ICmpZero Formula represents an ICmp which we're handling as a

5829// comparison against zero. Now that we've expanded an expression for that

5830// form, update the ICmp's other operand.

5831if (LU.Kind == LSRUse::ICmpZero) {

5832ICmpInst *CI = cast<ICmpInst>(LF.UserInst);

5833if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))

5834 DeadInsts.emplace_back(OperandIsInstr);

5835assert(!F.BaseGV &&"ICmp does not support folding a global value and "

5836"a scale at the same time!");

5837if (F.Scale == -1) {

5838if (ICmpScaledV->getType() != OpTy) {

5839Instruction *Cast =CastInst::Create(

5840CastInst::getCastOpcode(ICmpScaledV,false, OpTy,false),

5841 ICmpScaledV, OpTy,"tmp", CI->getIterator());

5842 ICmpScaledV = Cast;

5843 }

5844 CI->setOperand(1, ICmpScaledV);

5845 }else {

5846// A scale of 1 means that the scale has been expanded as part of the

5847// base regs.

5848assert((F.Scale == 0 ||F.Scale == 1) &&

5849"ICmp does not support folding a global value and "

5850"a scale at the same time!");

5851Constant *C =ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),

5852 -(uint64_t)Offset.getFixedValue());

5853if (C->getType() != OpTy) {

5854C =ConstantFoldCastOperand(

5855CastInst::getCastOpcode(C,false, OpTy,false),C, OpTy,

5856 CI->getDataLayout());

5857assert(C &&"Cast of ConstantInt should have folded");

5858 }

5859

5860 CI->setOperand(1,C);

5861 }

5862 }

5863

5864return FullV;

5865}

5866

5867/// Helper for Rewrite. PHI nodes are special because the use of their operands

5868/// effectively happens in their predecessor blocks, so the expression may need

5869/// to be expanded in multiple places.

5870void LSRInstance::RewriteForPHI(PHINode *PN,const LSRUse &LU,

5871const LSRFixup &LF,const Formula &F,

5872SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

5873DenseMap<BasicBlock *, Value *>Inserted;

5874

5875for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

5876if (PN->getIncomingValue(i) == LF.OperandValToReplace) {

5877bool needUpdateFixups =false;

5878BasicBlock *BB = PN->getIncomingBlock(i);

5879

5880// If this is a critical edge, split the edge so that we do not insert

5881// the code on all predecessor/successor paths. We do this unless this

5882// is the canonical backedge for this loop, which complicates post-inc

5883// users.

5884if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&

5885 !isa<IndirectBrInst>(BB->getTerminator()) &&

5886 !isa<CatchSwitchInst>(BB->getTerminator())) {

5887BasicBlock *Parent = PN->getParent();

5888Loop *PNLoop = LI.getLoopFor(Parent);

5889if (!PNLoop || Parent != PNLoop->getHeader()) {

5890// Split the critical edge.

5891BasicBlock *NewBB =nullptr;

5892if (!Parent->isLandingPad()) {

5893 NewBB =

5894SplitCriticalEdge(BB, Parent,

5895CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)

5896 .setMergeIdenticalEdges()

5897 .setKeepOneInputPHIs());

5898 }else {

5899SmallVector<BasicBlock*, 2> NewBBs;

5900DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

5901SplitLandingPadPredecessors(Parent, BB,"","", NewBBs, &DTU, &LI);

5902 NewBB = NewBBs[0];

5903 }

5904// If NewBB==NULL, then SplitCriticalEdge refused to split because all

5905// phi predecessors are identical. The simple thing to do is skip

5906// splitting in this case rather than complicate the API.

5907if (NewBB) {

5908// If PN is outside of the loop and BB is in the loop, we want to

5909// move the block to be immediately before the PHI block, not

5910// immediately after BB.

5911if (L->contains(BB) && !L->contains(PN))

5912 NewBB->moveBefore(PN->getParent());

5913

5914// Splitting the edge can reduce the number of PHI entries we have.

5915e = PN->getNumIncomingValues();

5916 BB = NewBB;

5917 i = PN->getBasicBlockIndex(BB);

5918

5919 needUpdateFixups =true;

5920 }

5921 }

5922 }

5923

5924 std::pair<DenseMap<BasicBlock *, Value *>::iterator,bool> Pair =

5925Inserted.insert(std::make_pair(BB,static_cast<Value *>(nullptr)));

5926if (!Pair.second)

5927 PN->setIncomingValue(i, Pair.first->second);

5928else {

5929Value *FullV =

5930 Expand(LU, LF,F, BB->getTerminator()->getIterator(), DeadInsts);

5931

5932// If this is reuse-by-noop-cast, insert the noop cast.

5933Type *OpTy = LF.OperandValToReplace->getType();

5934if (FullV->getType() != OpTy)

5935 FullV =CastInst::Create(

5936CastInst::getCastOpcode(FullV,false, OpTy,false), FullV,

5937 LF.OperandValToReplace->getType(),"tmp",

5938 BB->getTerminator()->getIterator());

5939

5940// If the incoming block for this value is not in the loop, it means the

5941// current PHI is not in a loop exit, so we must create a LCSSA PHI for

5942// the inserted value.

5943if (auto *I = dyn_cast<Instruction>(FullV))

5944if (L->contains(I) && !L->contains(BB))

5945 InsertedNonLCSSAInsts.insert(I);

5946

5947 PN->setIncomingValue(i, FullV);

5948 Pair.first->second = FullV;

5949 }

5950

5951// If LSR splits critical edge and phi node has other pending

5952// fixup operands, we need to update those pending fixups. Otherwise

5953// formulae will not be implemented completely and some instructions

5954// will not be eliminated.

5955if (needUpdateFixups) {

5956for (LSRUse &LU :Uses)

5957for (LSRFixup &Fixup : LU.Fixups)

5958// If fixup is supposed to rewrite some operand in the phi

5959// that was just updated, it may be already moved to

5960// another phi node. Such fixup requires update.

5961if (Fixup.UserInst == PN) {

5962// Check if the operand we try to replace still exists in the

5963// original phi.

5964bool foundInOriginalPHI =false;

5965for (constauto &val : PN->incoming_values())

5966if (val ==Fixup.OperandValToReplace) {

5967 foundInOriginalPHI =true;

5968break;

5969 }

5970

5971// If fixup operand found in original PHI - nothing to do.

5972if (foundInOriginalPHI)

5973continue;

5974

5975// Otherwise it might be moved to another PHI and requires update.

5976// If fixup operand not found in any of the incoming blocks that

5977// means we have already rewritten it - nothing to do.

5978for (constauto &Block : PN->blocks())

5979for (BasicBlock::iterator I =Block->begin(); isa<PHINode>(I);

5980 ++I) {

5981PHINode *NewPN = cast<PHINode>(I);

5982for (constauto &val : NewPN->incoming_values())

5983if (val ==Fixup.OperandValToReplace)

5984Fixup.UserInst = NewPN;

5985 }

5986 }

5987 }

5988 }

5989}

5990

5991/// Emit instructions for the leading candidate expression for this LSRUse (this

5992/// is called "expanding"), and update the UserInst to reference the newly

5993/// expanded value.

5994void LSRInstance::Rewrite(const LSRUse &LU,const LSRFixup &LF,

5995const Formula &F,

5996SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

5997// First, find an insertion point that dominates UserInst. For PHI nodes,

5998// find the nearest block which dominates all the relevant uses.

5999if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {

6000 RewriteForPHI(PN, LU, LF,F, DeadInsts);

6001 }else {

6002Value *FullV = Expand(LU, LF,F, LF.UserInst->getIterator(), DeadInsts);

6003

6004// If this is reuse-by-noop-cast, insert the noop cast.

6005Type *OpTy = LF.OperandValToReplace->getType();

6006if (FullV->getType() != OpTy) {

6007Instruction *Cast =

6008CastInst::Create(CastInst::getCastOpcode(FullV,false, OpTy,false),

6009 FullV, OpTy,"tmp", LF.UserInst->getIterator());

6010 FullV = Cast;

6011 }

6012

6013// Update the user. ICmpZero is handled specially here (for now) because

6014// Expand may have updated one of the operands of the icmp already, and

6015// its new value may happen to be equal to LF.OperandValToReplace, in

6016// which case doing replaceUsesOfWith leads to replacing both operands

6017// with the same value. TODO: Reorganize this.

6018if (LU.Kind == LSRUse::ICmpZero)

6019 LF.UserInst->setOperand(0, FullV);

6020else

6021 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);

6022 }

6023

6024if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))

6025 DeadInsts.emplace_back(OperandIsInstr);

6026}

6027

6028// Trying to hoist the IVInc to loop header if all IVInc users are in

6029// the loop header. It will help backend to generate post index load/store

6030// when the latch block is different from loop header block.

6031staticboolcanHoistIVInc(constTargetTransformInfo &TTI,const LSRFixup &Fixup,

6032const LSRUse &LU,Instruction *IVIncInsertPos,

6033Loop *L) {

6034if (LU.Kind != LSRUse::Address)

6035returnfalse;

6036

6037// For now this code do the conservative optimization, only work for

6038// the header block. Later we can hoist the IVInc to the block post

6039// dominate all users.

6040BasicBlock *LHeader = L->getHeader();

6041if (IVIncInsertPos->getParent() == LHeader)

6042returnfalse;

6043

6044if (!Fixup.OperandValToReplace ||

6045any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {

6046 Instruction *UI = cast<Instruction>(U);

6047 return UI->getParent() != LHeader;

6048 }))

6049returnfalse;

6050

6051Instruction *I =Fixup.UserInst;

6052Type *Ty =I->getType();

6053return Ty->isIntegerTy() &&

6054 ((isa<LoadInst>(I) &&TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||

6055 (isa<StoreInst>(I) &&TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)));

6056}

6057

6058/// Rewrite all the fixup locations with new values, following the chosen

6059/// solution.

6060void LSRInstance::ImplementSolution(

6061constSmallVectorImpl<const Formula *> &Solution) {

6062// Keep track of instructions we may have made dead, so that

6063// we can remove them after we are done working.

6064SmallVector<WeakTrackingVH, 16> DeadInsts;

6065

6066// Mark phi nodes that terminate chains so the expander tries to reuse them.

6067for (const IVChain &Chain : IVChainVec) {

6068if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))

6069Rewriter.setChainedPhi(PN);

6070 }

6071

6072// Expand the new value definitions and update the users.

6073for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx)

6074for (const LSRFixup &Fixup :Uses[LUIdx].Fixups) {

6075Instruction *InsertPos =

6076canHoistIVInc(TTI,Fixup,Uses[LUIdx], IVIncInsertPos, L)

6077 ?L->getHeader()->getTerminator()

6078 : IVIncInsertPos;

6079Rewriter.setIVIncInsertPos(L, InsertPos);

6080 Rewrite(Uses[LUIdx],Fixup, *Solution[LUIdx], DeadInsts);

6081 Changed =true;

6082 }

6083

6084auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();

6085formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);

6086

6087for (const IVChain &Chain : IVChainVec) {

6088 GenerateIVChain(Chain, DeadInsts);

6089 Changed =true;

6090 }

6091

6092for (constWeakVH &IV :Rewriter.getInsertedIVs())

6093if (IV && dyn_cast<Instruction>(&*IV)->getParent())

6094 ScalarEvolutionIVs.push_back(IV);

6095

6096// Clean up after ourselves. This must be done before deleting any

6097// instructions.

6098Rewriter.clear();

6099

6100 Changed |=RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,

6101 &TLI, MSSAU);

6102

6103// In our cost analysis above, we assume that each addrec consumes exactly

6104// one register, and arrange to have increments inserted just before the

6105// latch to maximimize the chance this is true. However, if we reused

6106// existing IVs, we now need to move the increments to match our

6107// expectations. Otherwise, our cost modeling results in us having a

6108// chosen a non-optimal result for the actual schedule. (And yes, this

6109// scheduling decision does impact later codegen.)

6110for (PHINode &PN :L->getHeader()->phis()) {

6111BinaryOperator *BO =nullptr;

6112Value *Start =nullptr, *Step =nullptr;

6113if (!matchSimpleRecurrence(&PN, BO, Start, Step))

6114continue;

6115

6116switch (BO->getOpcode()) {

6117case Instruction::Sub:

6118if (BO->getOperand(0) != &PN)

6119// sub is non-commutative - match handling elsewhere in LSR

6120continue;

6121break;

6122case Instruction::Add:

6123break;

6124default:

6125continue;

6126 };

6127

6128if (!isa<Constant>(Step))

6129// If not a constant step, might increase register pressure

6130// (We assume constants have been canonicalized to RHS)

6131continue;

6132

6133if (BO->getParent() == IVIncInsertPos->getParent())

6134// Only bother moving across blocks. Isel can handle block local case.

6135continue;

6136

6137// Can we legally schedule inc at the desired point?

6138if (!llvm::all_of(BO->uses(),

6139 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))

6140continue;

6141 BO->moveBefore(IVIncInsertPos->getIterator());

6142 Changed =true;

6143 }

6144

6145

6146}

6147

6148LSRInstance::LSRInstance(Loop *L,IVUsers &IU,ScalarEvolution &SE,

6149DominatorTree &DT,LoopInfo &LI,

6150constTargetTransformInfo &TTI,AssumptionCache &AC,

6151TargetLibraryInfo &TLI,MemorySSAUpdater *MSSAU)

6152 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),TTI(TTI),L(L),

6153 MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0

6154 ?PreferredAddresingMode

6155 :TTI.getPreferredAddressingMode(L, &SE)),

6156Rewriter(SE,L->getHeader()->getDataLayout(),"lsr",false),

6157 BaselineCost(L, SE,TTI, AMK) {

6158// If LoopSimplify form is not available, stay out of trouble.

6159if (!L->isLoopSimplifyForm())

6160return;

6161

6162// If there's no interesting work to be done, bail early.

6163if (IU.empty())return;

6164

6165// If there's too much analysis to be done, bail early. We won't be able to

6166// model the problem anyway.

6167unsigned NumUsers = 0;

6168for (constIVStrideUse &U : IU) {

6169if (++NumUsers >MaxIVUsers) {

6170 (void)U;

6171LLVM_DEBUG(dbgs() <<"LSR skipping loop, too many IV Users in " << U

6172 <<"\n");

6173return;

6174 }

6175// Bail out if we have a PHI on an EHPad that gets a value from a

6176// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is

6177// no good place to stick any instructions.

6178if (auto *PN = dyn_cast<PHINode>(U.getUser())) {

6179auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();

6180if (isa<FuncletPadInst>(FirstNonPHI) ||

6181 isa<CatchSwitchInst>(FirstNonPHI))

6182for (BasicBlock *PredBB : PN->blocks())

6183if (isa<CatchSwitchInst>(PredBB->getFirstNonPHIIt()))

6184return;

6185 }

6186 }

6187

6188LLVM_DEBUG(dbgs() <<"\nLSR on loop ";

6189L->getHeader()->printAsOperand(dbgs(),/*PrintType=*/false);

6190dbgs() <<":\n");

6191

6192// Configure SCEVExpander already now, so the correct mode is used for

6193// isSafeToExpand() checks.

6194#if LLVM_ENABLE_ABI_BREAKING_CHECKS

6195Rewriter.setDebugType(DEBUG_TYPE);

6196#endif

6197Rewriter.disableCanonicalMode();

6198Rewriter.enableLSRMode();

6199

6200// First, perform some low-level loop optimizations.

6201 OptimizeShadowIV();

6202 OptimizeLoopTermCond();

6203

6204// If loop preparation eliminates all interesting IV users, bail.

6205if (IU.empty())return;

6206

6207// Skip nested loops until we can model them better with formulae.

6208if (!L->isInnermost()) {

6209LLVM_DEBUG(dbgs() <<"LSR skipping outer loop " << *L <<"\n");

6210return;

6211 }

6212

6213// Start collecting data and preparing for the solver.

6214// If number of registers is not the major cost, we cannot benefit from the

6215// current profitable chain optimization which is based on number of

6216// registers.

6217// FIXME: add profitable chain optimization for other kinds major cost, for

6218// example number of instructions.

6219if (TTI.isNumRegsMajorCostOfLSR() ||StressIVChain)

6220 CollectChains();

6221 CollectInterestingTypesAndFactors();

6222 CollectFixupsAndInitialFormulae();

6223 CollectLoopInvariantFixupsAndFormulae();

6224

6225if (Uses.empty())

6226return;

6227

6228LLVM_DEBUG(dbgs() <<"LSR found " <<Uses.size() <<" uses:\n";

6229 print_uses(dbgs()));

6230LLVM_DEBUG(dbgs() <<"The baseline solution requires ";

6231 BaselineCost.print(dbgs());dbgs() <<"\n");

6232

6233// Now use the reuse data to generate a bunch of interesting ways

6234// to formulate the values needed for the uses.

6235 GenerateAllReuseFormulae();

6236

6237 FilterOutUndesirableDedicatedRegisters();

6238 NarrowSearchSpaceUsingHeuristics();

6239

6240SmallVector<const Formula *, 8> Solution;

6241 Solve(Solution);

6242

6243// Release memory that is no longer needed.

6244 Factors.clear();

6245Types.clear();

6246 RegUses.clear();

6247

6248if (Solution.empty())

6249return;

6250

6251#ifndef NDEBUG

6252// Formulae should be legal.

6253for (const LSRUse &LU :Uses) {

6254for (const Formula &F : LU.Formulae)

6255assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

6256F) &&"Illegal formula generated!");

6257 };

6258#endif

6259

6260// Now that we've decided what we want, make it so.

6261 ImplementSolution(Solution);

6262}

6263

6264#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

6265void LSRInstance::print_factors_and_types(raw_ostream &OS) const{

6266if (Factors.empty() &&Types.empty())return;

6267

6268OS <<"LSR has identified the following interesting factors and types: ";

6269boolFirst =true;

6270

6271for (int64_t Factor : Factors) {

6272if (!First)OS <<", ";

6273First =false;

6274OS <<'*' << Factor;

6275 }

6276

6277for (Type *Ty : Types) {

6278if (!First)OS <<", ";

6279First =false;

6280OS <<'(' << *Ty <<')';

6281 }

6282OS <<'\n';

6283}

6284

6285void LSRInstance::print_fixups(raw_ostream &OS) const{

6286OS <<"LSR is examining the following fixup sites:\n";

6287for (const LSRUse &LU :Uses)

6288for (const LSRFixup &LF : LU.Fixups) {

6289dbgs() <<" ";

6290 LF.print(OS);

6291OS <<'\n';

6292 }

6293}

6294

6295void LSRInstance::print_uses(raw_ostream &OS) const{

6296OS <<"LSR is examining the following uses:\n";

6297for (const LSRUse &LU :Uses) {

6298dbgs() <<" ";

6299 LU.print(OS);

6300OS <<'\n';

6301for (const Formula &F : LU.Formulae) {

6302OS <<" ";

6303F.print(OS);

6304OS <<'\n';

6305 }

6306 }

6307}

6308

6309void LSRInstance::print(raw_ostream &OS) const{

6310 print_factors_and_types(OS);

6311 print_fixups(OS);

6312 print_uses(OS);

6313}

6314

6315LLVM_DUMP_METHODvoid LSRInstance::dump() const{

6316print(errs());errs() <<'\n';

6317}

6318#endif

6319

6320namespace{

6321

6322classLoopStrengthReduce :publicLoopPass {

6323public:

6324staticcharID;// Pass ID, replacement for typeid

6325

6326 LoopStrengthReduce();

6327

6328private:

6329boolrunOnLoop(Loop *L,LPPassManager &LPM)override;

6330voidgetAnalysisUsage(AnalysisUsage &AU)const override;

6331};

6332

6333}// end anonymous namespace

6334

6335LoopStrengthReduce::LoopStrengthReduce() :LoopPass(ID) {

6336initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());

6337}

6338

6339void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const{

6340// We split critical edges, so we change the CFG. However, we do update

6341// many analyses if they are around.

6342 AU.addPreservedID(LoopSimplifyID);

6343

6344 AU.addRequired<LoopInfoWrapperPass>();

6345 AU.addPreserved<LoopInfoWrapperPass>();

6346 AU.addRequiredID(LoopSimplifyID);

6347 AU.addRequired<DominatorTreeWrapperPass>();

6348 AU.addPreserved<DominatorTreeWrapperPass>();

6349 AU.addRequired<ScalarEvolutionWrapperPass>();

6350 AU.addPreserved<ScalarEvolutionWrapperPass>();

6351 AU.addRequired<AssumptionCacheTracker>();

6352 AU.addRequired<TargetLibraryInfoWrapperPass>();

6353// Requiring LoopSimplify a second time here prevents IVUsers from running

6354// twice, since LoopSimplify was invalidated by running ScalarEvolution.

6355 AU.addRequiredID(LoopSimplifyID);

6356 AU.addRequired<IVUsersWrapperPass>();

6357 AU.addPreserved<IVUsersWrapperPass>();

6358 AU.addRequired<TargetTransformInfoWrapperPass>();

6359 AU.addPreserved<MemorySSAWrapperPass>();

6360}

6361

6362namespace{

6363

6364/// Enables more convenient iteration over a DWARF expression vector.

6365staticiterator_range<llvm::DIExpression::expr_op_iterator>

6366ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {

6367llvm::DIExpression::expr_op_iterator Begin =

6368llvm::DIExpression::expr_op_iterator(Expr.begin());

6369llvm::DIExpression::expr_op_iterator End =

6370llvm::DIExpression::expr_op_iterator(Expr.end());

6371return {Begin,End};

6372}

6373

6374structSCEVDbgValueBuilder {

6375 SCEVDbgValueBuilder() =default;

6376 SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }

6377

6378void clone(const SCEVDbgValueBuilder &Base) {

6379 LocationOps =Base.LocationOps;

6380 Expr =Base.Expr;

6381 }

6382

6383void clear() {

6384 LocationOps.clear();

6385 Expr.clear();

6386 }

6387

6388 /// The DIExpression as we translate the SCEV.

6389SmallVector<uint64_t, 6> Expr;

6390 /// The location ops of the DIExpression.

6391SmallVector<Value *, 2> LocationOps;

6392

6393void pushOperator(uint64_t Op) { Expr.push_back(Op); }

6394void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }

6395

6396 /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value

6397 /// in the set of values referenced by the expression.

6398void pushLocation(llvm::Value *V) {

6399 Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);

6400auto *It =llvm::find(LocationOps, V);

6401unsigned ArgIndex = 0;

6402if (It != LocationOps.end()) {

6403 ArgIndex = std::distance(LocationOps.begin(), It);

6404 }else {

6405 ArgIndex = LocationOps.size();

6406 LocationOps.push_back(V);

6407 }

6408 Expr.push_back(ArgIndex);

6409 }

6410

6411void pushValue(constSCEVUnknown *U) {

6412llvm::Value *V = cast<SCEVUnknown>(U)->getValue();

6413 pushLocation(V);

6414 }

6415

6416bool pushConst(constSCEVConstant *C) {

6417if (C->getAPInt().getSignificantBits() > 64)

6418returnfalse;

6419 Expr.push_back(llvm::dwarf::DW_OP_consts);

6420 Expr.push_back(C->getAPInt().getSExtValue());

6421returntrue;

6422 }

6423

6424// Iterating the expression as DWARF ops is convenient when updating

6425// DWARF_OP_LLVM_args.

6426iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {

6427return ToDwarfOpIter(Expr);

6428 }

6429

6430 /// Several SCEV types are sequences of the same arithmetic operator applied

6431 /// to constants and values that may be extended or truncated.

6432bool pushArithmeticExpr(constllvm::SCEVCommutativeExpr *CommExpr,

6433uint64_t DwarfOp) {

6434assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&

6435"Expected arithmetic SCEV type");

6436boolSuccess =true;

6437unsigned EmitOperator = 0;

6438for (constauto &Op : CommExpr->operands()) {

6439Success &= pushSCEV(Op);

6440

6441if (EmitOperator >= 1)

6442 pushOperator(DwarfOp);

6443 ++EmitOperator;

6444 }

6445returnSuccess;

6446 }

6447

6448// TODO: Identify and omit noop casts.

6449bool pushCast(constllvm::SCEVCastExpr *C,bool IsSigned) {

6450constllvm::SCEV *Inner =C->getOperand(0);

6451constllvm::Type *Type =C->getType();

6452uint64_t ToWidth =Type->getIntegerBitWidth();

6453boolSuccess = pushSCEV(Inner);

6454uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,

6455 IsSigned ? llvm::dwarf::DW_ATE_signed

6456 : llvm::dwarf::DW_ATE_unsigned};

6457for (constauto &Op : CastOps)

6458 pushOperator(Op);

6459returnSuccess;

6460 }

6461

6462// TODO: MinMax - although these haven't been encountered in the test suite.

6463bool pushSCEV(constllvm::SCEV *S) {

6464boolSuccess =true;

6465if (constSCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {

6466Success &= pushConst(StartInt);

6467

6468 }elseif (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {

6469if (!U->getValue())

6470returnfalse;

6471 pushLocation(U->getValue());

6472

6473 }elseif (constSCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {

6474Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);

6475

6476 }elseif (constSCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {

6477Success &= pushSCEV(UDiv->getLHS());

6478Success &= pushSCEV(UDiv->getRHS());

6479 pushOperator(llvm::dwarf::DW_OP_div);

6480

6481 }elseif (constSCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {

6482// Assert if a new and unknown SCEVCastEXpr type is encountered.

6483assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||

6484 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&

6485"Unexpected cast type in SCEV.");

6486Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));

6487

6488 }elseif (constSCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {

6489Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);

6490

6491 }elseif (isa<SCEVAddRecExpr>(S)) {

6492// Nested SCEVAddRecExpr are generated by nested loops and are currently

6493// unsupported.

6494returnfalse;

6495

6496 }else {

6497returnfalse;

6498 }

6499returnSuccess;

6500 }

6501

6502 /// Return true if the combination of arithmetic operator and underlying

6503 /// SCEV constant value is an identity function.

6504bool isIdentityFunction(uint64_t Op,constSCEV *S) {

6505if (constSCEVConstant *C = dyn_cast<SCEVConstant>(S)) {

6506if (C->getAPInt().getSignificantBits() > 64)

6507returnfalse;

6508 int64_tI =C->getAPInt().getSExtValue();

6509switch (Op) {

6510case llvm::dwarf::DW_OP_plus:

6511case llvm::dwarf::DW_OP_minus:

6512returnI == 0;

6513case llvm::dwarf::DW_OP_mul:

6514case llvm::dwarf::DW_OP_div:

6515returnI == 1;

6516 }

6517 }

6518returnfalse;

6519 }

6520

6521 /// Convert a SCEV of a value to a DIExpression that is pushed onto the

6522 /// builder's expression stack. The stack should already contain an

6523 /// expression for the iteration count, so that it can be multiplied by

6524 /// the stride and added to the start.

6525 /// Components of the expression are omitted if they are an identity function.

6526 /// Chain (non-affine) SCEVs are not supported.

6527bool SCEVToValueExpr(constllvm::SCEVAddRecExpr &SAR,ScalarEvolution &SE) {

6528assert(SAR.isAffine() &&"Expected affine SCEV");

6529// TODO: Is this check needed?

6530if (isa<SCEVAddRecExpr>(SAR.getStart()))

6531returnfalse;

6532

6533constSCEV *Start = SAR.getStart();

6534constSCEV *Stride = SAR.getStepRecurrence(SE);

6535

6536// Skip pushing arithmetic noops.

6537if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {

6538if (!pushSCEV(Stride))

6539returnfalse;

6540 pushOperator(llvm::dwarf::DW_OP_mul);

6541 }

6542if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {

6543if (!pushSCEV(Start))

6544returnfalse;

6545 pushOperator(llvm::dwarf::DW_OP_plus);

6546 }

6547returntrue;

6548 }

6549

6550 /// Create an expression that is an offset from a value (usually the IV).

6551void createOffsetExpr(int64_tOffset,Value *OffsetValue) {

6552 pushLocation(OffsetValue);

6553DIExpression::appendOffset(Expr,Offset);

6554LLVM_DEBUG(

6555dbgs() <<"scev-salvage: Generated IV offset expression. Offset: "

6556 << std::to_string(Offset) <<"\n");

6557 }

6558

6559 /// Combine a translation of the SCEV and the IV to create an expression that

6560 /// recovers a location's value.

6561 /// returns true if an expression was created.

6562bool createIterCountExpr(constSCEV *S,

6563const SCEVDbgValueBuilder &IterationCount,

6564ScalarEvolution &SE) {

6565// SCEVs for SSA values are most frquently of the form

6566// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).

6567// This is because %a is a PHI node that is not the IV. However, these

6568// SCEVs have not been observed to result in debuginfo-lossy optimisations,

6569// so its not expected this point will be reached.

6570if (!isa<SCEVAddRecExpr>(S))

6571returnfalse;

6572

6573LLVM_DEBUG(dbgs() <<"scev-salvage: Location to salvage SCEV: " << *S

6574 <<'\n');

6575

6576constauto *Rec = cast<SCEVAddRecExpr>(S);

6577if (!Rec->isAffine())

6578returnfalse;

6579

6580if (S->getExpressionSize() >MaxSCEVSalvageExpressionSize)

6581returnfalse;

6582

6583// Initialise a new builder with the iteration count expression. In

6584// combination with the value's SCEV this enables recovery.

6585 clone(IterationCount);

6586if (!SCEVToValueExpr(*Rec, SE))

6587returnfalse;

6588

6589returntrue;

6590 }

6591

6592 /// Convert a SCEV of a value to a DIExpression that is pushed onto the

6593 /// builder's expression stack. The stack should already contain an

6594 /// expression for the iteration count, so that it can be multiplied by

6595 /// the stride and added to the start.

6596 /// Components of the expression are omitted if they are an identity function.

6597bool SCEVToIterCountExpr(constllvm::SCEVAddRecExpr &SAR,

6598ScalarEvolution &SE) {

6599assert(SAR.isAffine() &&"Expected affine SCEV");

6600if (isa<SCEVAddRecExpr>(SAR.getStart())) {

6601LLVM_DEBUG(dbgs() <<"scev-salvage: IV SCEV. Unsupported nested AddRec: "

6602 << SAR <<'\n');

6603returnfalse;

6604 }

6605constSCEV *Start = SAR.getStart();

6606constSCEV *Stride = SAR.getStepRecurrence(SE);

6607

6608// Skip pushing arithmetic noops.

6609if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {

6610if (!pushSCEV(Start))

6611returnfalse;

6612 pushOperator(llvm::dwarf::DW_OP_minus);

6613 }

6614if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {

6615if (!pushSCEV(Stride))

6616returnfalse;

6617 pushOperator(llvm::dwarf::DW_OP_div);

6618 }

6619returntrue;

6620 }

6621

6622// Append the current expression and locations to a location list and an

6623// expression list. Modify the DW_OP_LLVM_arg indexes to account for

6624// the locations already present in the destination list.

6625void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,

6626SmallVectorImpl<Value *> &DestLocations) {

6627assert(!DestLocations.empty() &&

6628"Expected the locations vector to contain the IV");

6629// The DWARF_OP_LLVM_arg arguments of the expression being appended must be

6630// modified to account for the locations already in the destination vector.

6631// All builders contain the IV as the first location op.

6632assert(!LocationOps.empty() &&

6633"Expected the location ops to contain the IV.");

6634// DestIndexMap[n] contains the index in DestLocations for the nth

6635// location in this SCEVDbgValueBuilder.

6636SmallVector<uint64_t, 2> DestIndexMap;

6637for (constauto &Op : LocationOps) {

6638auto It =find(DestLocations,Op);

6639if (It != DestLocations.end()) {

6640// Location already exists in DestLocations, reuse existing ArgIndex.

6641 DestIndexMap.push_back(std::distance(DestLocations.begin(), It));

6642continue;

6643 }

6644// Location is not in DestLocations, add it.

6645 DestIndexMap.push_back(DestLocations.size());

6646 DestLocations.push_back(Op);

6647 }

6648

6649for (constauto &Op : expr_ops()) {

6650if (Op.getOp() !=dwarf::DW_OP_LLVM_arg) {

6651Op.appendToVector(DestExpr);

6652continue;

6653 }

6654

6655 DestExpr.push_back(dwarf::DW_OP_LLVM_arg);

6656// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,

6657// DestIndexMap[n] contains its new index in DestLocations.

6658uint64_t NewIndex = DestIndexMap[Op.getArg(0)];

6659 DestExpr.push_back(NewIndex);

6660 }

6661 }

6662};

6663

6664/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs

6665/// and DIExpression.

6666structDVIRecoveryRec {

6667 DVIRecoveryRec(DbgValueInst *DbgValue)

6668 : DbgRef(DbgValue), Expr(DbgValue->getExpression()),

6669 HadLocationArgList(false) {}

6670 DVIRecoveryRec(DbgVariableRecord *DVR)

6671 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}

6672

6673PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgRef;

6674DIExpression *Expr;

6675bool HadLocationArgList;

6676SmallVector<WeakVH, 2> LocationOps;

6677SmallVector<const llvm::SCEV *, 2> SCEVs;

6678SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;

6679

6680void clear() {

6681for (auto &RE : RecoveryExprs)

6682 RE.reset();

6683 RecoveryExprs.clear();

6684 }

6685

6686 ~DVIRecoveryRec() { clear(); }

6687};

6688}// namespace

6689

6690/// Returns the total number of DW_OP_llvm_arg operands in the expression.

6691/// This helps in determining if a DIArglist is necessary or can be omitted from

6692/// the dbg.value.

6693staticunsignednumLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {

6694auto expr_ops = ToDwarfOpIter(Expr);

6695unsigned Count = 0;

6696for (autoOp : expr_ops)

6697if (Op.getOp() ==dwarf::DW_OP_LLVM_arg)

6698 Count++;

6699return Count;

6700}

6701

6702/// Overwrites DVI with the location and Ops as the DIExpression. This will

6703/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,

6704/// because a DIArglist is not created for the first argument of the dbg.value.

6705template <typename T>

6706staticvoidupdateDVIWithLocation(T &DbgVal,Value *Location,

6707SmallVectorImpl<uint64_t> &Ops) {

6708assert(numLLVMArgOps(Ops) == 0 &&"Expected expression that does not "

6709"contain any DW_OP_llvm_arg operands.");

6710 DbgVal.setRawLocation(ValueAsMetadata::get(Location));

6711 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

6712 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

6713}

6714

6715/// Overwrite DVI with locations placed into a DIArglist.

6716template <typename T>

6717staticvoidupdateDVIWithLocations(T &DbgVal,

6718SmallVectorImpl<Value *> &Locations,

6719SmallVectorImpl<uint64_t> &Ops) {

6720assert(numLLVMArgOps(Ops) != 0 &&

6721"Expected expression that references DIArglist locations using "

6722"DW_OP_llvm_arg operands.");

6723SmallVector<ValueAsMetadata *, 3> MetadataLocs;

6724for (Value *V : Locations)

6725 MetadataLocs.push_back(ValueAsMetadata::get(V));

6726auto ValArrayRef =llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);

6727 DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));

6728 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

6729}

6730

6731/// Write the new expression and new location ops for the dbg.value. If possible

6732/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This

6733/// can be omitted if:

6734/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.

6735/// 2. The DW_OP_LLVM_arg is the first operand in the expression.

6736staticvoidUpdateDbgValueInst(DVIRecoveryRec &DVIRec,

6737SmallVectorImpl<Value *> &NewLocationOps,

6738SmallVectorImpl<uint64_t> &NewExpr) {

6739auto UpdateDbgValueInstImpl = [&](auto *DbgVal) {

6740unsigned NumLLVMArgs =numLLVMArgOps(NewExpr);

6741if (NumLLVMArgs == 0) {

6742// Location assumed to be on the stack.

6743updateDVIWithLocation(*DbgVal, NewLocationOps[0],NewExpr);

6744 }elseif (NumLLVMArgs == 1 &&NewExpr[0] ==dwarf::DW_OP_LLVM_arg) {

6745// There is only a single DW_OP_llvm_arg at the start of the expression,

6746// so it can be omitted along with DIArglist.

6747assert(NewExpr[1] == 0 &&

6748"Lone LLVM_arg in a DIExpression should refer to location-op 0.");

6749llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));

6750updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);

6751 }else {

6752// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.

6753updateDVIWithLocations(*DbgVal, NewLocationOps,NewExpr);

6754 }

6755

6756// If the DIExpression was previously empty then add the stack terminator.

6757// Non-empty expressions have only had elements inserted into them and so

6758// the terminator should already be present e.g. stack_value or fragment.

6759DIExpression *SalvageExpr = DbgVal->getExpression();

6760if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {

6761 SalvageExpr =

6762DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});

6763 DbgVal->setExpression(SalvageExpr);

6764 }

6765 };

6766if (isa<DbgValueInst *>(DVIRec.DbgRef))

6767 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));

6768else

6769 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));

6770}

6771

6772/// Cached location ops may be erased during LSR, in which case a poison is

6773/// required when restoring from the cache. The type of that location is no

6774/// longer available, so just use int8. The poison will be replaced by one or

6775/// more locations later when a SCEVDbgValueBuilder selects alternative

6776/// locations to use for the salvage.

6777staticValue *getValueOrPoison(WeakVH &VH,LLVMContext &C) {

6778return (VH) ? VH :PoisonValue::get(llvm::Type::getInt8Ty(C));

6779}

6780

6781/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.

6782staticvoidrestorePreTransformState(DVIRecoveryRec &DVIRec) {

6783auto RestorePreTransformStateImpl = [&](auto *DbgVal) {

6784LLVM_DEBUG(dbgs() <<"scev-salvage: restore dbg.value to pre-LSR state\n"

6785 <<"scev-salvage: post-LSR: " << *DbgVal <<'\n');

6786assert(DVIRec.Expr &&"Expected an expression");

6787 DbgVal->setExpression(DVIRec.Expr);

6788

6789// Even a single location-op may be inside a DIArgList and referenced with

6790// DW_OP_LLVM_arg, which is valid only with a DIArgList.

6791if (!DVIRec.HadLocationArgList) {

6792assert(DVIRec.LocationOps.size() == 1 &&

6793"Unexpected number of location ops.");

6794// LSR's unsuccessful salvage attempt may have added DIArgList, which in

6795// this case was not present before, so force the location back to a

6796// single uncontained Value.

6797Value *CachedValue =

6798getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());

6799 DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));

6800 }else {

6801SmallVector<ValueAsMetadata *, 3> MetadataLocs;

6802for (WeakVH VH : DVIRec.LocationOps) {

6803Value *CachedValue =getValueOrPoison(VH, DbgVal->getContext());

6804 MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));

6805 }

6806auto ValArrayRef =llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);

6807 DbgVal->setRawLocation(

6808llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));

6809 }

6810LLVM_DEBUG(dbgs() <<"scev-salvage: pre-LSR: " << *DbgVal <<'\n');

6811 };

6812if (isa<DbgValueInst *>(DVIRec.DbgRef))

6813 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));

6814else

6815 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));

6816}

6817

6818staticboolSalvageDVI(llvm::Loop *L,ScalarEvolution &SE,

6819llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,

6820constSCEV *SCEVInductionVar,

6821 SCEVDbgValueBuilder IterCountExpr) {

6822

6823if (isa<DbgValueInst *>(DVIRec.DbgRef)

6824 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()

6825 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())

6826returnfalse;

6827

6828// LSR may have caused several changes to the dbg.value in the failed salvage

6829// attempt. So restore the DIExpression, the location ops and also the

6830// location ops format, which is always DIArglist for multiple ops, but only

6831// sometimes for a single op.

6832restorePreTransformState(DVIRec);

6833

6834// LocationOpIndexMap[i] will store the post-LSR location index of

6835// the non-optimised out location at pre-LSR index i.

6836SmallVector<int64_t, 2> LocationOpIndexMap;

6837 LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);

6838SmallVector<Value *, 2> NewLocationOps;

6839 NewLocationOps.push_back(LSRInductionVar);

6840

6841for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {

6842WeakVH VH = DVIRec.LocationOps[i];

6843// Place the locations not optimised out in the list first, avoiding

6844// inserts later. The map is used to update the DIExpression's

6845// DW_OP_LLVM_arg arguments as the expression is updated.

6846if (VH && !isa<UndefValue>(VH)) {

6847 NewLocationOps.push_back(VH);

6848 LocationOpIndexMap[i] = NewLocationOps.size() - 1;

6849LLVM_DEBUG(dbgs() <<"scev-salvage: Location index " << i

6850 <<" now at index " << LocationOpIndexMap[i] <<"\n");

6851continue;

6852 }

6853

6854// It's possible that a value referred to in the SCEV may have been

6855// optimised out by LSR.

6856if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||

6857 SE.containsUndefs(DVIRec.SCEVs[i])) {

6858LLVM_DEBUG(dbgs() <<"scev-salvage: SCEV for location at index: " << i

6859 <<" refers to a location that is now undef or erased. "

6860"Salvage abandoned.\n");

6861returnfalse;

6862 }

6863

6864LLVM_DEBUG(dbgs() <<"scev-salvage: salvaging location at index " << i

6865 <<" with SCEV: " << *DVIRec.SCEVs[i] <<"\n");

6866

6867 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();

6868 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();

6869

6870// Create an offset-based salvage expression if possible, as it requires

6871// less DWARF ops than an iteration count-based expression.

6872if (std::optional<APInt>Offset =

6873 SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {

6874if (Offset->getSignificantBits() <= 64)

6875 SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);

6876else

6877returnfalse;

6878 }elseif (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,

6879 SE))

6880returnfalse;

6881 }

6882

6883// Merge the DbgValueBuilder generated expressions and the original

6884// DIExpression, place the result into an new vector.

6885SmallVector<uint64_t, 3>NewExpr;

6886if (DVIRec.Expr->getNumElements() == 0) {

6887assert(DVIRec.RecoveryExprs.size() == 1 &&

6888"Expected only a single recovery expression for an empty "

6889"DIExpression.");

6890assert(DVIRec.RecoveryExprs[0] &&

6891"Expected a SCEVDbgSalvageBuilder for location 0");

6892 SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();

6893B->appendToVectors(NewExpr, NewLocationOps);

6894 }

6895for (constauto &Op : DVIRec.Expr->expr_ops()) {

6896// Most Ops needn't be updated.

6897if (Op.getOp() !=dwarf::DW_OP_LLVM_arg) {

6898Op.appendToVector(NewExpr);

6899continue;

6900 }

6901

6902uint64_t LocationArgIndex =Op.getArg(0);

6903 SCEVDbgValueBuilder *DbgBuilder =

6904 DVIRec.RecoveryExprs[LocationArgIndex].get();

6905// The location doesn't have s SCEVDbgValueBuilder, so LSR did not

6906// optimise it away. So just translate the argument to the updated

6907// location index.

6908if (!DbgBuilder) {

6909NewExpr.push_back(dwarf::DW_OP_LLVM_arg);

6910assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&

6911"Expected a positive index for the location-op position.");

6912NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);

6913continue;

6914 }

6915// The location has a recovery expression.

6916 DbgBuilder->appendToVectors(NewExpr, NewLocationOps);

6917 }

6918

6919UpdateDbgValueInst(DVIRec, NewLocationOps,NewExpr);

6920if (isa<DbgValueInst *>(DVIRec.DbgRef))

6921LLVM_DEBUG(dbgs() <<"scev-salvage: Updated DVI: "

6922 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<"\n");

6923else

6924LLVM_DEBUG(dbgs() <<"scev-salvage: Updated DVI: "

6925 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<"\n");

6926returntrue;

6927}

6928

6929/// Obtain an expression for the iteration count, then attempt to salvage the

6930/// dbg.value intrinsics.

6931staticvoidDbgRewriteSalvageableDVIs(

6932llvm::Loop *L,ScalarEvolution &SE,llvm::PHINode *LSRInductionVar,

6933SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {

6934if (DVIToUpdate.empty())

6935return;

6936

6937constllvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);

6938assert(SCEVInductionVar &&

6939"Anticipated a SCEV for the post-LSR induction variable");

6940

6941if (constSCEVAddRecExpr *IVAddRec =

6942 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {

6943if (!IVAddRec->isAffine())

6944return;

6945

6946// Prevent translation using excessive resources.

6947if (IVAddRec->getExpressionSize() >MaxSCEVSalvageExpressionSize)

6948return;

6949

6950// The iteration count is required to recover location values.

6951 SCEVDbgValueBuilder IterCountExpr;

6952 IterCountExpr.pushLocation(LSRInductionVar);

6953if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))

6954return;

6955

6956LLVM_DEBUG(dbgs() <<"scev-salvage: IV SCEV: " << *SCEVInductionVar

6957 <<'\n');

6958

6959for (auto &DVIRec : DVIToUpdate) {

6960SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,

6961 IterCountExpr);

6962 }

6963 }

6964}

6965

6966/// Identify and cache salvageable DVI locations and expressions along with the

6967/// corresponding SCEV(s). Also ensure that the DVI is not deleted between

6968/// cacheing and salvaging.

6969staticvoidDbgGatherSalvagableDVI(

6970Loop *L,ScalarEvolution &SE,

6971SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,

6972SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {

6973for (constauto &B : L->getBlocks()) {

6974for (auto &I : *B) {

6975auto ProcessDbgValue = [&](auto *DbgVal) ->bool {

6976// Ensure that if any location op is undef that the dbg.vlue is not

6977// cached.

6978if (DbgVal->isKillLocation())

6979returnfalse;

6980

6981// Check that the location op SCEVs are suitable for translation to

6982// DIExpression.

6983constauto &HasTranslatableLocationOps =

6984 [&](constauto *DbgValToTranslate) ->bool {

6985for (constauto LocOp : DbgValToTranslate->location_ops()) {

6986if (!LocOp)

6987returnfalse;

6988

6989if (!SE.isSCEVable(LocOp->getType()))

6990returnfalse;

6991

6992constSCEV *S = SE.getSCEV(LocOp);

6993if (SE.containsUndefs(S))

6994returnfalse;

6995 }

6996returntrue;

6997 };

6998

6999if (!HasTranslatableLocationOps(DbgVal))

7000returnfalse;

7001

7002 std::unique_ptr<DVIRecoveryRec> NewRec =

7003 std::make_unique<DVIRecoveryRec>(DbgVal);

7004// Each location Op may need a SCEVDbgValueBuilder in order to recover

7005// it. Pre-allocating a vector will enable quick lookups of the builder

7006// later during the salvage.

7007 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());

7008for (constauto LocOp : DbgVal->location_ops()) {

7009 NewRec->SCEVs.push_back(SE.getSCEV(LocOp));

7010 NewRec->LocationOps.push_back(LocOp);

7011 NewRec->HadLocationArgList = DbgVal->hasArgList();

7012 }

7013 SalvageableDVISCEVs.push_back(std::move(NewRec));

7014returntrue;

7015 };

7016for (DbgVariableRecord &DVR :filterDbgVars(I.getDbgRecordRange())) {

7017if (DVR.isDbgValue() || DVR.isDbgAssign())

7018 ProcessDbgValue(&DVR);

7019 }

7020auto DVI = dyn_cast<DbgValueInst>(&I);

7021if (!DVI)

7022continue;

7023if (ProcessDbgValue(DVI))

7024 DVIHandles.insert(DVI);

7025 }

7026 }

7027}

7028

7029/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback

7030/// any PHi from the loop header is usable, but may have less chance of

7031/// surviving subsequent transforms.

7032staticllvm::PHINode *GetInductionVariable(constLoop &L,ScalarEvolution &SE,

7033const LSRInstance &LSR) {

7034

7035auto IsSuitableIV = [&](PHINode *P) {

7036if (!SE.isSCEVable(P->getType()))

7037returnfalse;

7038if (constSCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))

7039return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));

7040returnfalse;

7041 };

7042

7043// For now, just pick the first IV that was generated and inserted by

7044// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away

7045// by subsequent transforms.

7046for (constWeakVH &IV : LSR.getScalarEvolutionIVs()) {

7047if (!IV)

7048continue;

7049

7050// There should only be PHI node IVs.

7051PHINode *P = cast<PHINode>(&*IV);

7052

7053if (IsSuitableIV(P))

7054returnP;

7055 }

7056

7057for (PHINode &P : L.getHeader()->phis()) {

7058if (IsSuitableIV(&P))

7059return &P;

7060 }

7061returnnullptr;

7062}

7063

7064staticboolReduceLoopStrength(Loop *L,IVUsers &IU,ScalarEvolution &SE,

7065DominatorTree &DT,LoopInfo &LI,

7066constTargetTransformInfo &TTI,

7067AssumptionCache &AC,TargetLibraryInfo &TLI,

7068MemorySSA *MSSA) {

7069

7070// Debug preservation - before we start removing anything identify which DVI

7071// meet the salvageable criteria and store their DIExpression and SCEVs.

7072SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;

7073SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;

7074DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);

7075

7076bool Changed =false;

7077 std::unique_ptr<MemorySSAUpdater> MSSAU;

7078if (MSSA)

7079 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);

7080

7081// Run the main LSR transformation.

7082const LSRInstance &Reducer =

7083 LSRInstance(L, IU, SE, DT, LI,TTI, AC, TLI, MSSAU.get());

7084 Changed |= Reducer.getChanged();

7085

7086// Remove any extra phis created by processing inner loops.

7087 Changed |=DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

7088if (EnablePhiElim && L->isLoopSimplifyForm()) {

7089SmallVector<WeakTrackingVH, 16> DeadInsts;

7090constDataLayout &DL = L->getHeader()->getDataLayout();

7091SCEVExpander Rewriter(SE,DL,"lsr",false);

7092#if LLVM_ENABLE_ABI_BREAKING_CHECKS

7093Rewriter.setDebugType(DEBUG_TYPE);

7094#endif

7095unsigned numFolded =Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);

7096Rewriter.clear();

7097if (numFolded) {

7098 Changed =true;

7099RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,

7100 MSSAU.get());

7101DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

7102 }

7103 }

7104// LSR may at times remove all uses of an induction variable from a loop.

7105// The only remaining use is the PHI in the exit block.

7106// When this is the case, if the exit value of the IV can be calculated using

7107// SCEV, we can replace the exit block PHI with the final value of the IV and

7108// skip the updates in each loop iteration.

7109if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {

7110SmallVector<WeakTrackingVH, 16> DeadInsts;

7111constDataLayout &DL = L->getHeader()->getDataLayout();

7112SCEVExpander Rewriter(SE,DL,"lsr",true);

7113int Rewrites =rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI,Rewriter, &DT,

7114UnusedIndVarInLoop, DeadInsts);

7115Rewriter.clear();

7116if (Rewrites) {

7117 Changed =true;

7118RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,

7119 MSSAU.get());

7120DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

7121 }

7122 }

7123

7124if (SalvageableDVIRecords.empty())

7125return Changed;

7126

7127// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with

7128// expressions composed using the derived iteration count.

7129// TODO: Allow for multiple IV references for nested AddRecSCEVs

7130for (constauto &L : LI) {

7131if (llvm::PHINode *IV =GetInductionVariable(*L, SE, Reducer))

7132DbgRewriteSalvageableDVIs(L, SE,IV, SalvageableDVIRecords);

7133else {

7134LLVM_DEBUG(dbgs() <<"scev-salvage: SCEV salvaging not possible. An IV "

7135"could not be identified.\n");

7136 }

7137 }

7138

7139for (auto &Rec : SalvageableDVIRecords)

7140 Rec->clear();

7141 SalvageableDVIRecords.clear();

7142 DVIHandles.clear();

7143return Changed;

7144}

7145

7146bool LoopStrengthReduce::runOnLoop(Loop *L,LPPassManager &/*LPM*/) {

7147if (skipLoop(L))

7148returnfalse;

7149

7150auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();

7151auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();

7152auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();

7153auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

7154constauto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(

7155 *L->getHeader()->getParent());

7156auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(

7157 *L->getHeader()->getParent());

7158auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(

7159 *L->getHeader()->getParent());

7160auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();

7161MemorySSA *MSSA =nullptr;

7162if (MSSAAnalysis)

7163 MSSA = &MSSAAnalysis->getMSSA();

7164returnReduceLoopStrength(L, IU, SE, DT, LI,TTI, AC, TLI, MSSA);

7165}

7166

7167PreservedAnalyses LoopStrengthReducePass::run(Loop &L,LoopAnalysisManager &AM,

7168LoopStandardAnalysisResults &AR,

7169LPMUpdater &) {

7170if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,

7171 AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))

7172returnPreservedAnalyses::all();

7173

7174auto PA =getLoopPassPreservedAnalyses();

7175if (AR.MSSA)

7176 PA.preserve<MemorySSAAnalysis>();

7177return PA;

7178}

7179

7180char LoopStrengthReduce::ID = 0;

7181

7182INITIALIZE_PASS_BEGIN(LoopStrengthReduce,"loop-reduce",

7183"Loop Strength Reduction",false,false)

7184INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

7185INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

7186INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)

7187INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)

7188INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

7189INITIALIZE_PASS_DEPENDENCY(LoopSimplify)

7190INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",

7191 "Loop Strength Reduction",false,false)

7192

7193Pass *llvm::createLoopStrengthReducePass() {returnnew LoopStrengthReduce(); }

Success

#define Success

Definition:AArch64Disassembler.cpp:220

for

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

Definition:AArch64ExpandPseudoInsts.cpp:115

APInt.h

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

Definition:ArchiveWriter.cpp:205

AssumptionCache.h

isEqual

static bool isEqual(const Function &Caller, const Function &Callee)

Definition:Attributes.cpp:2469

getParent

static const Function * getParent(const Value *V)

Definition:BasicAliasAnalysis.cpp:863

BasicBlockUtils.h

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

Casting.h

CommandLine.h

clEnumValN

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

Definition:CommandLine.h:686

Compiler.h

LLVM_DUMP_METHOD

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

Definition:Compiler.h:622

Constants.h

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

isCanonical

static bool isCanonical(const MDString *S)

Definition:DebugInfoMetadata.cpp:386

DebugInfoMetadata.h

Debug.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition:Debug.h:106

DenseMap.h

This file defines the DenseMap class.

DenseSet.h

This file defines the DenseSet and SmallDenseSet classes.

This file contains constants used for implementing Dwarf debug support.

Other

std::optional< std::vector< StOtherPiece > > Other

Definition:ELFYAML.cpp:1315

End

bool End

Definition:ELF_riscv.cpp:480

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

GlobalValue.h

Hashing.h

Loops

Hexagon Hardware Loops

Definition:HexagonHardwareLoops.cpp:373

Module.h This file contains the declarations for the Module class.

Operator.h

Type.h

Use.h

This defines the Use class.

User.h

Value.h

Users

iv Induction Variable Users

Definition:IVUsers.cpp:48

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

Definition:Lint.cpp:557

LoopAnalysisManager.h

This header provides classes for managing per-loop analyses.

LoopInfo.h

LoopPass.h

SalvageDVI

static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)

Definition:LoopStrengthReduce.cpp:6818

DropScaledForVScale

static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))

getWideOperand

static Value * getWideOperand(Value *Oper)

IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.

Definition:LoopStrengthReduce.cpp:2983

isAddSExtable

static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)

Return true if the given add can be sign-extended without changing its value.

Definition:LoopStrengthReduce.cpp:799

mayUsePostIncMode

static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)

Return true if the SCEV represents a value that may end up as a post-increment operation.

Definition:LoopStrengthReduce.cpp:3893

restorePreTransformState

static void restorePreTransformState(DVIRecoveryRec &DVIRec)

Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.

Definition:LoopStrengthReduce.cpp:6782

ExtractImmediate

static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)

If S involves the addition of a constant integer value, return that integer value,...

Definition:LoopStrengthReduce.cpp:924

containsAddRecDependentOnLoop

static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)

Definition:LoopStrengthReduce.cpp:614

findIVOperand

static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)

Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...

Definition:LoopStrengthReduce.cpp:2964

isLegalUse

static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)

Test whether we know how to expand the current formula.

Definition:LoopStrengthReduce.cpp:1917

isMulSExtable

static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)

Return true if the given mul can be sign-extended without changing its value.

Definition:LoopStrengthReduce.cpp:807

MaxSCEVSalvageExpressionSize

static const unsigned MaxSCEVSalvageExpressionSize

Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.

Definition:LoopStrengthReduce.cpp:144

isExistingPhi

static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)

Return true if this AddRec is already a phi in its loop.

Definition:LoopStrengthReduce.cpp:1082

getScalingFactorCost

static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)

Definition:LoopStrengthReduce.cpp:1963

InsnsCost

static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))

StressIVChain

static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))

isAddressUse

static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)

Returns true if the specified instruction is using the specified value as an address.

Definition:LoopStrengthReduce.cpp:984

ExtractSymbol

static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)

If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...

Definition:LoopStrengthReduce.cpp:958

updateDVIWithLocation

static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)

Overwrites DVI with the location and Ops as the DIExpression.

Definition:LoopStrengthReduce.cpp:6706

isLegalAddImmediate

static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)

Definition:LoopStrengthReduce.cpp:1938

AllowDropSolutionIfLessProfitable

static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))

EnableVScaleImmediates

static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))

PreferredAddresingMode

static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))

getExprBase

static const SCEV * getExprBase(const SCEV *S)

Return an approximation of this SCEV expression's "base", or NULL for any constant.

Definition:LoopStrengthReduce.cpp:2999

isAlwaysFoldable

static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)

Definition:LoopStrengthReduce.cpp:2008

GetInductionVariable

static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)

Ideally pick the PHI IV inserted by ScalarEvolutionExpander.

Definition:LoopStrengthReduce.cpp:7032

IsSimplerBaseSCEVForTarget

static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)

Definition:LoopStrengthReduce.cpp:5307

reduce

loop reduce

Definition:LoopStrengthReduce.cpp:7190

MaxIVUsers

static const unsigned MaxIVUsers

MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.

Definition:LoopStrengthReduce.cpp:138

isHighCostExpansion

static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)

Check if expanding this expression is likely to incur significant cost.

Definition:LoopStrengthReduce.cpp:1102

getValueOrPoison

static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)

Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...

Definition:LoopStrengthReduce.cpp:6777

getAccessType

static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)

Return the type of the memory being accessed.

Definition:LoopStrengthReduce.cpp:1029

numLLVMArgOps

static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)

Returns the total number of DW_OP_llvm_arg operands in the expression.

Definition:LoopStrengthReduce.cpp:6693

DbgRewriteSalvageableDVIs

static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)

Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.

Definition:LoopStrengthReduce.cpp:6931

EnablePhiElim

static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))

DbgGatherSalvagableDVI

static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)

Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).

Definition:LoopStrengthReduce.cpp:6969

isAddRecSExtable

static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)

Return true if the given addrec can be sign-extended without changing its value.

Definition:LoopStrengthReduce.cpp:791

canHoistIVInc

static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)

Definition:LoopStrengthReduce.cpp:6031

DoInitialMatch

static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)

Recursion helper for initialMatch.

Definition:LoopStrengthReduce.cpp:541

isAMCompletelyFolded

static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)

Check if the addressing mode defined by F is completely folded in LU at isel time.

Definition:LoopStrengthReduce.cpp:1946

LSRExpNarrow

static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))

FilterSameScaledReg

static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))

updateDVIWithLocations

static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)

Overwrite DVI with locations placed into a DIArglist.

Definition:LoopStrengthReduce.cpp:6717

ComplexityLimit

static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))

UpdateDbgValueInst

static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)

Write the new expression and new location ops for the dbg.value.

Definition:LoopStrengthReduce.cpp:6736

ReduceLoopStrength

static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)

Definition:LoopStrengthReduce.cpp:7064

isProfitableChain

static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)

Return true if the number of registers needed for the chain is estimated to be less than the number r...

Definition:LoopStrengthReduce.cpp:3066

CollectSubexprs

static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)

Split S into subexpressions which can be pulled out into separate registers.

Definition:LoopStrengthReduce.cpp:3835

getExactSDiv

static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)

Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...

Definition:LoopStrengthReduce.cpp:819

canFoldIVIncExpr

static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)

Return true if the IVInc can be folded into an addressing mode.

Definition:LoopStrengthReduce.cpp:3359

DEBUG_TYPE

#define DEBUG_TYPE

Definition:LoopStrengthReduce.cpp:132

getAnyExtendConsideringPostIncUses

static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)

Extend/Truncate Expr to ToTy considering post-inc uses in Loops.

Definition:LoopStrengthReduce.cpp:4400

getSetupCost

static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)

Definition:LoopStrengthReduce.cpp:1392

SetupCostDepthLimit

static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))

LoopStrengthReduce.h

LoopUtils.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

#define G(x, y, z)

Definition:MD5.cpp:56

Reg

unsigned Reg

Definition:MachineSink.cpp:2028

MathExtras.h

MemorySSAUpdater.h

MemorySSA.h

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

uint64_t IntrinsicInst * II

Definition:NVVMIntrRange.cpp:51

#define P(N)

Fixup

PowerPC TLS Dynamic Call Fixup

Definition:PPCTLSDynamicCall.cpp:339

INITIALIZE_PASS_DEPENDENCY

#define INITIALIZE_PASS_DEPENDENCY(depName)

Definition:PassSupport.h:55

INITIALIZE_PASS_END

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:57

INITIALIZE_PASS_BEGIN

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:52

Pass.h

PointerIntPair.h

This file defines the PointerIntPair class.

Cond

const SmallVectorImpl< MachineOperand > & Cond

Definition:RISCVRedundantCopyElimination.cpp:75

Uses

Remove Loads Into Fake Uses

Definition:RemoveLoadsIntoFakeUses.cpp:75

isValid

static bool isValid(const char C)

Returns true if C is a valid mangled character: <0-9a-zA-Z_>.

Definition:RustDemangle.cpp:181

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SI optimize exec mask operations pre RA

Definition:SIOptimizeExecMaskingPreRA.cpp:71

Address

@ Address

Definition:SPIRVEmitNonSemanticDI.cpp:68

STLExtras.h

This file contains some templates that are useful if you are working with the STL at all.

raw_pwrite_stream & OS

Definition:SampleProfWriter.cpp:51

ScalarEvolutionExpander.h

ScalarEvolutionExpressions.h

ScalarEvolutionNormalization.h

ScalarEvolution.h

Scalar.h

SetVector.h

This file implements a set that has insertion order iteration characteristics.

SmallBitVector.h

This file implements the SmallBitVector class.

SmallPtrSet.h

This file defines the SmallPtrSet class.

SmallSet.h

This file defines the SmallSet class.

SmallVector.h

This file defines the SmallVector class.

Statistic.h

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

getType

static SymbolRef::Type getType(const Symbol *Sym)

Definition:TapiFile.cpp:39

TargetLibraryInfo.h

TargetTransformInfo.h

This pass exposes codegen information to IR-level passes.

Virtual Register Rewriter

Definition:VirtRegMap.cpp:261

RHS

Value * RHS

Definition:X86PartialReduction.cpp:74

LHS

Value * LHS

Definition:X86PartialReduction.cpp:73

Mul

BinaryOperator * Mul

Definition:X86PartialReduction.cpp:68

static const uint32_t IV[8]

Definition:blake3_impl.h:78

LiveDebugValues::DbgValue

Class recording the (high level) value of a variable.

Definition:InstrRefBasedImpl.h:512

NewExpr

Definition:ItaniumDemangle.h:2103

const_iterator

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::APInt::getZExtValue

uint64_t getZExtValue() const

Get zero extended value.

Definition:APInt.h:1520

llvm::APInt::isNegative

bool isNegative() const

Determine sign of this APInt.

Definition:APInt.h:329

llvm::APInt::sdiv

APInt sdiv(const APInt &RHS) const

Signed division function for APInt.

Definition:APInt.cpp:1618

llvm::APInt::getSignificantBits

unsigned getSignificantBits() const

Get the minimum bit size for this signed APInt.

Definition:APInt.h:1511

llvm::APInt::srem

APInt srem(const APInt &RHS) const

Function for signed remainder operation.

Definition:APInt.cpp:1710

llvm::APInt::getSExtValue

int64_t getSExtValue() const

Get sign extended value.

Definition:APInt.h:1542

llvm::AddOperator

Definition:Operator.h:405

llvm::AnalysisManager

A container for analyses that lazily runs them and caches their results.

Definition:PassManager.h:253

llvm::AnalysisManager::getResult

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Definition:PassManager.h:410

llvm::AnalysisUsage

Represent the analysis usage information of a pass.

Definition:PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequiredID

AnalysisUsage & addRequiredID(const void *ID)

Definition:Pass.cpp:270

llvm::AnalysisUsage::addPreservedID

AnalysisUsage & addPreservedID(const void *ID)

Definition:PassAnalysisSupport.h:88

llvm::AnalysisUsage::addRequired

AnalysisUsage & addRequired()

Definition:PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

Definition:PassAnalysisSupport.h:98

llvm::Any

Definition:Any.h:28

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::AssertingVH

Value handle that asserts if the Value is deleted.

Definition:ValueHandle.h:264

llvm::AssumptionCacheTracker

An immutable pass that tracks lazily created AssumptionCache objects.

Definition:AssumptionCache.h:204

llvm::AssumptionCache

A cache of @llvm.assume calls within a function.

Definition:AssumptionCache.h:42

llvm::AtomicCmpXchgInst

An instruction that atomically checks whether a specified value is in a memory location,...

Definition:Instructions.h:501

llvm::AtomicRMWInst

an instruction that atomically reads a memory location, combines it with another value,...

Definition:Instructions.h:704

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::phis

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

Definition:BasicBlock.h:530

llvm::BasicBlock::iterator

InstListType::iterator iterator

Instruction iterators...

Definition:BasicBlock.h:177

llvm::BasicBlock::moveBefore

void moveBefore(BasicBlock *MovePos)

Unlink this basic block from its current function and insert it into the function that MovePos lives ...

Definition:BasicBlock.h:389

llvm::BasicBlock::isLandingPad

bool isLandingPad() const

Return true if this basic block is a landing pad.

Definition:BasicBlock.cpp:699

llvm::BasicBlock::getTerminator

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Definition:BasicBlock.h:240

llvm::BinaryOperator

Definition:InstrTypes.h:170

llvm::BinaryOperator::getOpcode

BinaryOps getOpcode() const

Definition:InstrTypes.h:370

llvm::BinaryOperator::Create

static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)

Construct a binary instruction, given the opcode and the two operands.

Definition:Instructions.cpp:2639

llvm::BranchInst

Conditional or Unconditional Branch instruction.

Definition:Instructions.h:3016

llvm::BranchInst::isUnconditional

bool isUnconditional() const

Definition:Instructions.h:3089

llvm::BranchInst::getCondition

Value * getCondition() const

Definition:Instructions.h:3092

llvm::CastInst::getCastOpcode

static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)

Returns the opcode necessary to cast Val into Ty using usual casting rules.

Definition:Instructions.cpp:3144

llvm::CastInst::Create

static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...

Definition:Instructions.cpp:2972

llvm::CmpInst::Predicate

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Definition:InstrTypes.h:673

llvm::CmpInst::ICMP_EQ

@ ICMP_EQ

equal

Definition:InstrTypes.h:694

llvm::CmpInst::ICMP_NE

@ ICMP_NE

not equal

Definition:InstrTypes.h:695

llvm::CmpInst::getInversePredicate

Predicate getInversePredicate() const

For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...

Definition:InstrTypes.h:787

llvm::ConstantInt

This is the shared class of boolean and integer constants.

Definition:Constants.h:83

llvm::ConstantInt::isValueValidForType

static bool isValueValidForType(Type *Ty, uint64_t V)

This static method returns true if the type Ty is big enough to represent the value V.

Definition:Constants.cpp:1597

llvm::ConstantInt::getSigned

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

Definition:Constants.h:126

llvm::ConstantInt::getSExtValue

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

Definition:Constants.h:163

llvm::ConstantInt::getZExtValue

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

Definition:Constants.h:157

llvm::Constant

This is an important base class in LLVM.

Definition:Constant.h:42

llvm::DIArgList::get

static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)

Definition:DebugInfoMetadata.cpp:2315

llvm::DIExpression::expr_op_iterator

An iterator for expression operands.

Definition:DebugInfoMetadata.h:2851

llvm::DIExpression

DWARF expression.

Definition:DebugInfoMetadata.h:2763

llvm::DIExpression::append

static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)

Append the opcodes Ops to DIExpr.

Definition:DebugInfoMetadata.cpp:1948

llvm::DIExpression::appendOffset

static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)

Append Ops with operations to apply the Offset.

Definition:DebugInfoMetadata.cpp:1721

llvm::DIExpression::isComplex

bool isComplex() const

Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...

Definition:DebugInfoMetadata.cpp:1540

llvm::DWARFExpression::Operation

This class represents an Operation in the Expression.

Definition:DWARFExpression.h:32

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::DbgValueInst

This represents the llvm.dbg.value instruction.

Definition:IntrinsicInst.h:468

llvm::DbgVariableRecord

Record of a variable value-assignment, aka a non instruction representation of the dbg....

Definition:DebugProgramInstruction.h:270

llvm::DenseMapBase::insert

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Definition:DenseMap.h:211

llvm::DenseMapBase::clear

void clear()

Definition:DenseMap.h:110

llvm::DenseMap

Definition:DenseMap.h:727

llvm::DenseSet

Implements a dense probed hash-table based set.

Definition:DenseSet.h:278

llvm::DomTreeNodeBase< BasicBlock >

llvm::DomTreeNodeBase::getBlock

NodeT * getBlock() const

Definition:GenericDomTree.h:89

llvm::DomTreeUpdater

Definition:DomTreeUpdater.h:30

llvm::DominatorTreeBase::getNode

DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const

getNode - return the (Post)DominatorTree node for the specified basic block.

Definition:GenericDomTree.h:401

llvm::DominatorTreeBase::properlyDominates

bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const

properlyDominates - Returns true iff A dominates B and A != B.

Definition:GenericDomTree.h:443

llvm::DominatorTreeWrapperPass

Legacy analysis pass which computes a DominatorTree.

Definition:Dominators.h:317

llvm::DominatorTree

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Definition:Dominators.h:162

llvm::DominatorTree::findNearestCommonDominator

Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const

Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...

Definition:Dominators.cpp:344

llvm::DominatorTree::dominates

bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

Definition:Dominators.cpp:122

llvm::GlobalValue

Definition:GlobalValue.h:48

llvm::ICmpInst

This instruction compares its operands according to the predicate given to the constructor.

Definition:Instructions.h:1158

llvm::IRBuilder

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Definition:IRBuilder.h:2705

llvm::IVStrideUse

IVStrideUse - Keep track of one use of a strided induction variable.

Definition:IVUsers.h:35

llvm::IVStrideUse::transformToPostInc

void transformToPostInc(const Loop *L)

transformToPostInc - Transform the expression to post-inc form for the given loop.

Definition:IVUsers.cpp:367

llvm::IVStrideUse::getOperandValToReplace

Value * getOperandValToReplace() const

getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...

Definition:IVUsers.h:54

llvm::IVStrideUse::setUser

void setUser(Instruction *NewUser)

setUser - Assign a new user instruction for this use.

Definition:IVUsers.h:48

llvm::IVUsersAnalysis

Analysis pass that exposes the IVUsers for a loop.

Definition:IVUsers.h:184

llvm::IVUsersWrapperPass

Definition:IVUsers.h:163

llvm::IVUsers

Definition:IVUsers.h:91

llvm::IVUsers::const_iterator

ilist< IVStrideUse >::const_iterator const_iterator

Definition:IVUsers.h:142

llvm::IVUsers::empty

bool empty() const

Definition:IVUsers.h:147

llvm::Init

Definition:Record.h:285

llvm::InstructionCost

Definition:InstructionCost.h:29

llvm::InstructionCost::print

void print(raw_ostream &OS) const

Definition:InstructionCost.cpp:19

llvm::InstructionCost::getValue

std::optional< CostType > getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

Definition:InstructionCost.h:87

llvm::InstructionCost::isValid

bool isValid() const

Definition:InstructionCost.h:79

llvm::Instruction

Definition:Instruction.h:68

llvm::Instruction::getNumSuccessors

unsigned getNumSuccessors() const LLVM_READONLY

Return the number of successors that this instruction has.

Definition:Instruction.cpp:1275

llvm::Instruction::getDebugLoc

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

Definition:Instruction.h:511

llvm::Instruction::isEHPad

bool isEHPad() const

Return true if the instruction is a variety of EH-block.

Definition:Instruction.h:869

llvm::Instruction::eraseFromParent

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Definition:Instruction.cpp:94

llvm::Instruction::getAccessType

Type * getAccessType() const LLVM_READONLY

Return the type this instruction accesses in memory, if any.

Definition:Instruction.cpp:1100

llvm::Instruction::getOpcode

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Definition:Instruction.h:310

llvm::Instruction::setDebugLoc

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

Definition:Instruction.h:508

llvm::Instruction::getDataLayout

const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

Definition:Instruction.cpp:76

llvm::Instruction::moveBefore

void moveBefore(Instruction *MovePos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

Definition:Instruction.cpp:175

llvm::IntegerType::get

static IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

Definition:Type.cpp:311

llvm::IntrinsicInst

A wrapper class for inspecting calls to intrinsic functions.

Definition:IntrinsicInst.h:48

llvm::LLVMContext

This is an important class for using LLVM in a threaded context.

Definition:LLVMContext.h:67

llvm::LPMUpdater

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

Definition:LoopPassManager.h:229

llvm::LPPassManager

Definition:LoopPass.h:76

llvm::LoadInst

An instruction for reading from memory.

Definition:Instructions.h:176

llvm::LoopBase::getExitingBlocks

void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const

Return all blocks inside the loop that have successors outside of the loop.

Definition:GenericLoopInfoImpl.h:33

llvm::LoopBase::getHeader

BlockT * getHeader() const

Definition:GenericLoopInfo.h:90

llvm::LoopBase::getLoopDepth

unsigned getLoopDepth() const

Return the nesting level of this loop.

Definition:GenericLoopInfo.h:82

llvm::LoopInfoWrapperPass

The legacy pass manager's analysis pass to compute loop information.

Definition:LoopInfo.h:593

llvm::LoopInfo

Definition:LoopInfo.h:407

llvm::LoopPass

Definition:LoopPass.h:28

llvm::LoopPass::runOnLoop

virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0

llvm::LoopStrengthReducePass::run

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

Definition:LoopStrengthReduce.cpp:7167

llvm::Loop

Represents a single loop in the control flow graph.

Definition:LoopInfo.h:39

llvm::MemorySSAAnalysis

An analysis that produces MemorySSA for a function.

Definition:MemorySSA.h:928

llvm::MemorySSAUpdater

Definition:MemorySSAUpdater.h:54

llvm::MemorySSAWrapperPass

Legacy analysis pass which computes MemorySSA.

Definition:MemorySSA.h:985

llvm::MemorySSA

Encapsulates MemorySSA, including all data associated with memory accesses.

Definition:MemorySSA.h:701

llvm::PHINode

Definition:Instructions.h:2600

llvm::PHINode::addIncoming

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

Definition:Instructions.h:2735

llvm::PHINode::blocks

iterator_range< const_block_iterator > blocks() const

Definition:Instructions.h:2661

llvm::PHINode::incoming_values

op_range incoming_values()

Definition:Instructions.h:2665

llvm::PHINode::setIncomingValue

void setIncomingValue(unsigned i, Value *V)

Definition:Instructions.h:2678

llvm::PHINode::getIncomingBlock

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Definition:Instructions.h:2695

llvm::PHINode::getIncomingValue

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

Definition:Instructions.h:2675

llvm::PHINode::getIncomingValueNumForOperand

static unsigned getIncomingValueNumForOperand(unsigned i)

Definition:Instructions.h:2689

llvm::PHINode::getBasicBlockIndex

int getBasicBlockIndex(const BasicBlock *BB) const

Return the first index of the specified basic block in the value list for this PHI.

Definition:Instructions.h:2768

llvm::PHINode::getNumIncomingValues

unsigned getNumIncomingValues() const

Return the number of incoming edges.

Definition:Instructions.h:2671

llvm::PHINode::Create

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

Definition:Instructions.h:2635

llvm::PassRegistry::getPassRegistry

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Definition:PassRegistry.cpp:24

llvm::Pass

Pass interface - Implemented by all 'passes'.

Definition:Pass.h:94

llvm::Pass::getAnalysisUsage

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

Definition:Pass.cpp:98

llvm::PointerIntPair

PointerIntPair - This class implements a pair of a pointer and small integer.

Definition:PointerIntPair.h:80

llvm::PointerUnion

A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...

Definition:PointerUnion.h:118

llvm::PoisonValue::get

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition:Constants.cpp:1878

llvm::PreservedAnalyses

A set of analyses that are preserved following a run of a transformation pass.

Definition:Analysis.h:111

llvm::PreservedAnalyses::all

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Definition:Analysis.h:117

llvm::SCEVAddExpr

This node represents an addition of some number of SCEVs.

Definition:ScalarEvolutionExpressions.h:266

llvm::SCEVAddRecExpr

This node represents a polynomial recurrence on the trip count of the specified loop.

Definition:ScalarEvolutionExpressions.h:347

llvm::SCEVAddRecExpr::getType

Type * getType() const

Definition:ScalarEvolutionExpressions.h:357

llvm::SCEVAddRecExpr::getStart

const SCEV * getStart() const

Definition:ScalarEvolutionExpressions.h:358

llvm::SCEVAddRecExpr::getStepRecurrence

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

Definition:ScalarEvolutionExpressions.h:365

llvm::SCEVAddRecExpr::isAffine

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

Definition:ScalarEvolutionExpressions.h:375

llvm::SCEVAddRecExpr::getLoop

const Loop * getLoop() const

Definition:ScalarEvolutionExpressions.h:359

llvm::SCEVCastExpr

This is the base class for unary cast operator classes.

Definition:ScalarEvolutionExpressions.h:103

llvm::SCEVCommutativeExpr

This node is the base class for n'ary commutative operators.

Definition:ScalarEvolutionExpressions.h:247

llvm::SCEVConstant

This class represents a constant integer value.

Definition:ScalarEvolutionExpressions.h:60

llvm::SCEVConstant::getType

Type * getType() const

Definition:ScalarEvolutionExpressions.h:72

llvm::SCEVConstant::getValue

ConstantInt * getValue() const

Definition:ScalarEvolutionExpressions.h:69

llvm::SCEVConstant::getAPInt

const APInt & getAPInt() const

Definition:ScalarEvolutionExpressions.h:70

llvm::SCEVExpander

This class uses information about analyze scalars to rewrite expressions in canonical form.

Definition:ScalarEvolutionExpander.h:63

llvm::SCEVIntegralCastExpr

This is the base class for unary integral cast operator classes.

Definition:ScalarEvolutionExpressions.h:141

llvm::SCEVMulExpr

This node represents multiplication of some number of SCEVs.

Definition:ScalarEvolutionExpressions.h:290

llvm::SCEVNAryExpr

This node is a base class providing common functionality for n'ary operators.

Definition:ScalarEvolutionExpressions.h:196

llvm::SCEVNAryExpr::hasNoUnsignedWrap

bool hasNoUnsignedWrap() const

Definition:ScalarEvolutionExpressions.h:226

llvm::SCEVNAryExpr::hasNoSignedWrap

bool hasNoSignedWrap() const

Definition:ScalarEvolutionExpressions.h:230

llvm::SCEVNAryExpr::operands

ArrayRef< const SCEV * > operands() const

Definition:ScalarEvolutionExpressions.h:218

llvm::SCEVSMaxExpr

This class represents a signed maximum selection.

Definition:ScalarEvolutionExpressions.h:464

llvm::SCEVUDivExpr

This class represents a binary unsigned division operation.

Definition:ScalarEvolutionExpressions.h:304

llvm::SCEVUMaxExpr

This class represents an unsigned maximum selection.

Definition:ScalarEvolutionExpressions.h:476

llvm::SCEVUnknown

This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...

Definition:ScalarEvolutionExpressions.h:577

llvm::SCEV

This class represents an analyzed expression in the program.

Definition:ScalarEvolution.h:71

llvm::SCEV::operands

ArrayRef< const SCEV * > operands() const

Return operands of this SCEV expression.

Definition:ScalarEvolution.cpp:420

llvm::SCEV::getExpressionSize

unsigned short getExpressionSize() const

Definition:ScalarEvolution.h:169

llvm::SCEV::isZero

bool isZero() const

Return true if the expression is a constant zero.

Definition:ScalarEvolution.cpp:448

llvm::SCEV::getSCEVType

SCEVTypes getSCEVType() const

Definition:ScalarEvolution.h:140

llvm::SCEV::getType

Type * getType() const

Return the LLVM type of this SCEV expression.

Definition:ScalarEvolution.cpp:386

llvm::SCEV::FlagAnyWrap

@ FlagAnyWrap

Definition:ScalarEvolution.h:127

llvm::SIToFPInst

This class represents a cast from signed integer to floating point.

Definition:Instructions.h:4723

llvm::ScalarEvolutionWrapperPass

Definition:ScalarEvolution.h:2352

llvm::ScalarEvolution

The main scalar evolution driver.

Definition:ScalarEvolution.h:447

llvm::ScalarEvolution::getBackedgeTakenCount

const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

Definition:ScalarEvolution.cpp:8350

llvm::ScalarEvolution::getZero

const SCEV * getZero(Type *Ty)

Return a SCEV for the constant 0 of a specific type.

Definition:ScalarEvolution.h:653

llvm::ScalarEvolution::getTypeSizeInBits

uint64_t getTypeSizeInBits(Type *Ty) const

Return the size in bits of the specified type, for which isSCEVable must return true.

Definition:ScalarEvolution.cpp:4448

llvm::ScalarEvolution::getConstant

const SCEV * getConstant(ConstantInt *V)

Definition:ScalarEvolution.cpp:473

llvm::ScalarEvolution::getSCEV

const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

Definition:ScalarEvolution.cpp:4547

llvm::ScalarEvolution::getNoopOrSignExtend

const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)

Return a SCEV corresponding to a conversion of the input value to the specified type.

Definition:ScalarEvolution.cpp:4742

llvm::ScalarEvolution::isLoopInvariant

bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

Definition:ScalarEvolution.cpp:14100

llvm::ScalarEvolution::getAddRecExpr

const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)

Get an add recurrence expression for the specified loop.

Definition:ScalarEvolution.cpp:3641

llvm::ScalarEvolution::isSCEVable

bool isSCEVable(Type *Ty) const

Test if values of the given type are analyzable within the SCEV framework.

Definition:ScalarEvolution.cpp:4441

llvm::ScalarEvolution::getEffectiveSCEVType

Type * getEffectiveSCEVType(Type *Ty) const

Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...

Definition:ScalarEvolution.cpp:4458

llvm::ScalarEvolution::getMinusSCEV

const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

Definition:ScalarEvolution.cpp:4655

llvm::ScalarEvolution::getAnyExtendExpr

const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)

getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...

Definition:ScalarEvolution.cpp:2180

llvm::ScalarEvolution::containsUndefs

bool containsUndefs(const SCEV *S) const

Return true if the SCEV expression contains an undef value.

Definition:ScalarEvolution.cpp:13577

llvm::ScalarEvolution::getSignExtendExpr

const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)

Definition:ScalarEvolution.cpp:1900

llvm::ScalarEvolution::getVScale

const SCEV * getVScale(Type *Ty)

Definition:ScalarEvolution.cpp:494

llvm::ScalarEvolution::hasComputableLoopEvolution

bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)

Return true if the given SCEV changes value in a known way in the specified loop.

Definition:ScalarEvolution.cpp:14104

llvm::ScalarEvolution::getPointerBase

const SCEV * getPointerBase(const SCEV *V)

Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...

Definition:ScalarEvolution.cpp:4823

llvm::ScalarEvolution::getMulExpr

const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

Definition:ScalarEvolution.cpp:3106

llvm::ScalarEvolution::getUnknown

const SCEV * getUnknown(Value *V)

Definition:ScalarEvolution.cpp:4411

llvm::ScalarEvolution::computeConstantDifference

std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)

Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...

Definition:ScalarEvolution.cpp:12059

llvm::ScalarEvolution::properlyDominates

bool properlyDominates(const SCEV *S, const BasicBlock *BB)

Return true if elements that makes up the given SCEV properly dominate the specified basic block.

Definition:ScalarEvolution.cpp:14187

llvm::ScalarEvolution::getAddExpr

const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

Definition:ScalarEvolution.cpp:2526

llvm::ScalarEvolution::containsErasedValue

bool containsErasedValue(const SCEV *S) const

Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.

Definition:ScalarEvolution.cpp:13586

llvm::ScalarEvolution::getContext

LLVMContext & getContext() const

Definition:ScalarEvolution.h:489

llvm::SelectInst

This class represents the LLVM 'select' instruction.

Definition:Instructions.h:1657

llvm::SetVector

A vector that has set insertion semantics.

Definition:SetVector.h:57

llvm::SetVector::size

size_type size() const

Determine the number of elements in the SetVector.

Definition:SetVector.h:98

llvm::SetVector::end

iterator end()

Get an iterator to the end of the SetVector.

Definition:SetVector.h:113

llvm::SetVector::begin

iterator begin()

Get an iterator to the beginning of the SetVector.

Definition:SetVector.h:103

llvm::SetVector::insert

bool insert(const value_type &X)

Insert a new element into the SetVector.

Definition:SetVector.h:162

llvm::SmallBitVector

This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...

Definition:SmallBitVector.h:35

llvm::SmallBitVector::find_first

int find_first() const

Returns the index of the first set bit, -1 if none of the bits are set.

Definition:SmallBitVector.h:230

llvm::SmallBitVector::set_bits

iterator_range< const_set_bits_iterator > set_bits() const

Definition:SmallBitVector.h:183

llvm::SmallBitVector::find_next

int find_next(unsigned Prev) const

Returns the index of the next set bit following the "Prev" bit.

Definition:SmallBitVector.h:277

llvm::SmallBitVector::size

size_type size() const

Returns the number of bits in this bitvector.

Definition:SmallBitVector.h:195

llvm::SmallBitVector::resize

void resize(unsigned N, bool t=false)

Grow or shrink the bitvector.

Definition:SmallBitVector.h:332

llvm::SmallBitVector::count

size_type count() const

Returns the number of bits which are set.

Definition:SmallBitVector.h:200

llvm::SmallPtrSetImplBase::clear

void clear()

Definition:SmallPtrSet.h:97

llvm::SmallPtrSetImpl

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

Definition:SmallPtrSet.h:363

llvm::SmallPtrSetImpl::count

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

Definition:SmallPtrSet.h:452

llvm::SmallPtrSetImpl::end

iterator end() const

Definition:SmallPtrSet.h:477

llvm::SmallPtrSetImpl::insert

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

Definition:SmallPtrSet.h:384

llvm::SmallPtrSetImpl::begin

iterator begin() const

Definition:SmallPtrSet.h:472

llvm::SmallPtrSet< const Loop *, 2 >

llvm::SmallSetVector

A SetVector that performs no allocations if smaller than a certain size.

Definition:SetVector.h:370

llvm::SmallSet

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

Definition:SmallSet.h:132

llvm::SmallSet::clear

void clear()

Definition:SmallSet.h:204

llvm::SmallSet::insert

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

Definition:SmallSet.h:181

llvm::SmallVectorBase::empty

bool empty() const

Definition:SmallVector.h:81

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::pop_back_val

T pop_back_val()

Definition:SmallVector.h:673

llvm::SmallVectorImpl::assign

void assign(size_type NumElts, ValueParamT Elt)

Definition:SmallVector.h:704

llvm::SmallVectorImpl::emplace_back

reference emplace_back(ArgTypes &&... Args)

Definition:SmallVector.h:937

llvm::SmallVectorImpl::reserve

void reserve(size_type N)

Definition:SmallVector.h:663

llvm::SmallVectorImpl::erase

iterator erase(const_iterator CI)

Definition:SmallVector.h:737

llvm::SmallVectorImpl::const_iterator

typename SuperClass::const_iterator const_iterator

Definition:SmallVector.h:578

llvm::SmallVectorImpl::insert

iterator insert(iterator I, T &&Elt)

Definition:SmallVector.h:805

llvm::SmallVectorImpl::clear

void clear()

Definition:SmallVector.h:610

llvm::SmallVectorImpl::iterator

typename SuperClass::iterator iterator

Definition:SmallVector.h:577

llvm::SmallVectorImpl::resize

void resize(size_type N)

Definition:SmallVector.h:638

llvm::SmallVectorTemplateBase::pop_back

void pop_back()

Definition:SmallVector.h:425

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVectorTemplateCommon::end

iterator end()

Definition:SmallVector.h:269

llvm::SmallVectorTemplateCommon::front

reference front()

Definition:SmallVector.h:299

llvm::SmallVectorTemplateCommon::begin

iterator begin()

Definition:SmallVector.h:267

llvm::SmallVectorTemplateCommon::back

reference back()

Definition:SmallVector.h:308

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::StackOffset::get

static StackOffset get(int64_t Fixed, int64_t Scalable)

Definition:TypeSize.h:44

llvm::StoreInst

An instruction for storing to memory.

Definition:Instructions.h:292

llvm::TargetLibraryInfoWrapperPass

Definition:TargetLibraryInfo.h:639

llvm::TargetLibraryInfo

Provides information about what library functions are available for the current target.

Definition:TargetLibraryInfo.h:280

llvm::TargetTransformInfoWrapperPass

Wrapper pass for TargetTransformInfo.

Definition:TargetTransformInfo.h:3250

llvm::TargetTransformInfo

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Definition:TargetTransformInfo.h:212

llvm::TargetTransformInfo::getTgtMemIntrinsic

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const

Definition:TargetTransformInfo.cpp:1251

llvm::TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable

bool shouldDropLSRSolutionIfLessProfitable() const

Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...

Definition:TargetTransformInfo.cpp:441

llvm::TargetTransformInfo::isLSRCostLess

bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const

Return true if LSR cost of C1 is lower than C2.

Definition:TargetTransformInfo.cpp:432

llvm::TargetTransformInfo::isProfitableLSRChainElement

bool isProfitableLSRChainElement(Instruction *I) const

Definition:TargetTransformInfo.cpp:445

llvm::TargetTransformInfo::LSRWithInstrQueries

bool LSRWithInstrQueries() const

Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...

Definition:TargetTransformInfo.cpp:569

llvm::TargetTransformInfo::isIndexedStoreLegal

bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const

Definition:TargetTransformInfo.cpp:1307

llvm::TargetTransformInfo::getRegisterClassForType

unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const

Definition:TargetTransformInfo.cpp:767

llvm::TargetTransformInfo::isLegalAddressingMode

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const

Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...

Definition:TargetTransformInfo.cpp:422

llvm::TargetTransformInfo::isIndexedLoadLegal

bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const

Definition:TargetTransformInfo.cpp:1302

llvm::TargetTransformInfo::isLegalICmpImmediate

bool isLegalICmpImmediate(int64_t Imm) const

Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...

Definition:TargetTransformInfo.cpp:418

llvm::TargetTransformInfo::isTypeLegal

bool isTypeLegal(Type *Ty) const

Return true if this type is legal.

Definition:TargetTransformInfo.cpp:583

llvm::TargetTransformInfo::isLegalAddImmediate

bool isLegalAddImmediate(int64_t Imm) const

Return true if the specified immediate is legal add immediate, that is the target has add instruction...

Definition:TargetTransformInfo.cpp:410

llvm::TargetTransformInfo::canSaveCmp

bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const

Return true if the target can save a compare for loop count, for example hardware loop saves a compar...

Definition:TargetTransformInfo.cpp:453

llvm::TargetTransformInfo::getNumberOfRegisters

unsigned getNumberOfRegisters(unsigned ClassID) const

Definition:TargetTransformInfo.cpp:759

llvm::TargetTransformInfo::isLegalAddScalableImmediate

bool isLegalAddScalableImmediate(int64_t Imm) const

Return true if adding the specified scalable immediate is legal, that is the target has add instructi...

Definition:TargetTransformInfo.cpp:414

llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR

bool isNumRegsMajorCostOfLSR() const

Return true if LSR major cost is number of registers.

Definition:TargetTransformInfo.cpp:437

llvm::TargetTransformInfo::MIM_PostInc

@ MIM_PostInc

Post-incrementing.

Definition:TargetTransformInfo.h:1700

llvm::TargetTransformInfo::canMacroFuseCmp

bool canMacroFuseCmp() const

Return true if the target can fuse a compare and branch.

Definition:TargetTransformInfo.cpp:449

llvm::TargetTransformInfo::AddressingModeKind

AddressingModeKind

Definition:TargetTransformInfo.h:780

llvm::TargetTransformInfo::AMK_PostIndexed

@ AMK_PostIndexed

Definition:TargetTransformInfo.h:782

llvm::TargetTransformInfo::AMK_PreIndexed

@ AMK_PreIndexed

Definition:TargetTransformInfo.h:781

llvm::TargetTransformInfo::AMK_None

@ AMK_None

Definition:TargetTransformInfo.h:783

llvm::TargetTransformInfo::getScalingFactorCost

InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const

Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...

Definition:TargetTransformInfo.cpp:560

llvm::TargetTransformInfo::isTruncateFree

bool isTruncateFree(Type *Ty1, Type *Ty2) const

Return true if it's free to truncate a value of type Ty1 to type Ty2.

Definition:TargetTransformInfo.cpp:573

llvm::TruncInst

This class represents a truncation of integer types.

Definition:Instructions.h:4503

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::getIntegerBitWidth

unsigned getIntegerBitWidth() const

llvm::Type::isPointerTy

bool isPointerTy() const

True if this is an instance of PointerType.

Definition:Type.h:264

llvm::Type::getPointerAddressSpace

unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

llvm::Type::getVoidTy

static Type * getVoidTy(LLVMContext &C)

llvm::Type::getFPMantissaWidth

int getFPMantissaWidth() const

Return the width of the mantissa of this type.

llvm::Type::getInt8Ty

static IntegerType * getInt8Ty(LLVMContext &C)

llvm::Type::isIntegerTy

bool isIntegerTy() const

True if this is an instance of IntegerType.

Definition:Type.h:237

llvm::UIToFPInst

This class represents a cast unsigned integer to floating point.

Definition:Instructions.h:4692

llvm::Use

A Use represents the edge between a Value definition and its users.

Definition:Use.h:43

llvm::User

Definition:User.h:44

llvm::User::operands

op_range operands()

Definition:User.h:288

llvm::User::replaceUsesOfWith

bool replaceUsesOfWith(Value *From, Value *To)

Replace uses of one Value with another.

Definition:User.cpp:21

llvm::User::setOperand

void setOperand(unsigned i, Value *Val)

Definition:User.h:233

llvm::User::getOperand

Value * getOperand(unsigned i) const

Definition:User.h:228

llvm::User::getNumOperands

unsigned getNumOperands() const

Definition:User.h:250

llvm::User::op_end

op_iterator op_end()

Definition:User.h:282

llvm::ValueAsMetadata::get

static ValueAsMetadata * get(Value *V)

Definition:Metadata.cpp:501

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::hasOneUse

bool hasOneUse() const

Return true if there is exactly one use of this value.

Definition:Value.h:434

llvm::Value::replaceAllUsesWith

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

Definition:Value.cpp:534

llvm::Value::users

iterator_range< user_iterator > users()

Definition:Value.h:421

llvm::Value::getContext

LLVMContext & getContext() const

All values hold a context through their type.

Definition:Value.cpp:1075

llvm::Value::uses

iterator_range< use_iterator > uses()

Definition:Value.h:376

llvm::WeakVH

A nullable Value handle that is nullable.

Definition:ValueHandle.h:144

llvm::cl::Option::getNumOccurrences

int getNumOccurrences() const

Definition:CommandLine.h:399

llvm::cl::opt

Definition:CommandLine.h:1423

llvm::detail::DenseSetImpl::insert

std::pair< iterator, bool > insert(const ValueT &V)

Definition:DenseSet.h:213

llvm::detail::DenseSetImpl::count

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

Definition:DenseSet.h:95

llvm::details::FixedOrScalableQuantity

Definition:TypeSize.h:88

llvm::ilist_detail::node_parent_access::getParent

const ParentTy * getParent() const

Definition:ilist_node.h:32

llvm::ilist_node_impl::getIterator

self_iterator getIterator()

Definition:ilist_node.h:132

llvm::iterator_range

A range adaptor for a pair of iterators.

Definition:iterator_range.h:42

llvm::raw_ostream

This class implements an extremely fast bulk output stream that can only output to a stream.

Definition:raw_ostream.h:52

uint64_t

unsigned

iterator_range.h

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

ErrorHandling.h

llvm_unreachable

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

false

Definition:StackSlotColoring.cpp:193

llvm::AArch64CC::NE

@ NE

Definition:AArch64BaseInfo.h:256

llvm::AArch64::Fixups

Fixups

Definition:AArch64FixupKinds.h:17

llvm::AMDGPU::PALMD::Key

Key

PAL metadata keys.

Definition:AMDGPUMetadata.h:487

llvm::AMDGPU::Imm

@ Imm

Definition:AMDGPURegBankLegalizeRules.h:105

llvm::ARM_AM::add

@ add

Definition:ARMAddressingModes.h:39

llvm::ARM::ProfileKind::M

@ M

llvm::COFF::Entry

@ Entry

Definition:COFF.h:844

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition:CallingConv.h:24

llvm::M68k::MemAddrModeKind::U

@ U

llvm::M68k::MemAddrModeKind::V

@ V

llvm::M68k::MemAddrModeKind::u

@ u

llvm::M68k::MemAddrModeKind::f

@ f

llvm::M68k::MemAddrModeKind::K

@ K

llvm::M68k::MemAddrModeKind::L

@ L

llvm::MCID::RegSequence

@ RegSequence

Definition:MCInstrDesc.h:182

llvm::PPCISD::SC

@ SC

CHAIN = SC CHAIN, Imm128 - System call.

Definition:PPCISelLowering.h:429

llvm::RISCVFenceField::R

@ R

Definition:RISCVBaseInfo.h:373

llvm::RISCVFenceField::O

@ O

Definition:RISCVBaseInfo.h:372

llvm::SPII::Store

@ Store

Definition:SparcInstrInfo.h:33

llvm::X86Disassembler::Reg

Reg

All possible values of the reg field in the ModR/M byte.

Definition:X86DisassemblerDecoder.h:621

llvm::X86::FirstMacroFusionInstKind::Cmp

@ Cmp

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::values

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

Definition:CommandLine.h:711

llvm::cl::BOU_FALSE

@ BOU_FALSE

Definition:CommandLine.h:637

llvm::cl::BOU_UNSET

@ BOU_UNSET

Definition:CommandLine.h:637

llvm::cl::BOU_TRUE

@ BOU_TRUE

Definition:CommandLine.h:637

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm::codeview::FrameCookieKind::Copy

@ Copy

llvm::codeview::Basic

@ Basic

Definition:CodeView.h:152

llvm::dwarf::DW_OP_LLVM_arg

@ DW_OP_LLVM_arg

Only used in LLVM metadata.

Definition:Dwarf.h:147

llvm::dwarf::DW_OP_LLVM_convert

@ DW_OP_LLVM_convert

Only used in LLVM metadata.

Definition:Dwarf.h:143

llvm::lltok::Kind

Kind

Definition:LLToken.h:18

llvm::logicalview::LVCompareKind::Types

@ Types

llvm::logicalview::LVAttributeKind::Inserted

@ Inserted

llvm::msgpack::Type::Map

@ Map

llvm::numbers::e

constexpr double e

Definition:MathExtras.h:47

llvm::objcarc::Sequence

Sequence

A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...

Definition:PtrState.h:41

llvm::ore::NV

DiagnosticInfoOptimizationBase::Argument NV

Definition:OptimizationRemarkEmitter.h:135

llvm::pdb::DbgHeaderType::Max

@ Max

llvm::rdf::Phi

NodeAddr< PhiNode * > Phi

Definition:RDFGraph.h:390

llvm::sampleprof::Base

@ Base

Definition:Discriminator.h:58

llvm::sys::path::begin

const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)

Get begin iterator over path.

Definition:Path.cpp:226

llvm::sys::path::end

const_iterator end(StringRef path LLVM_LIFETIME_BOUND)

Get end iterator over path.

Definition:Path.cpp:235

llvm::telemetry::KindType

unsigned KindType

For isa, dyn_cast, etc operations on TelemetryInfo.

Definition:Telemetry.h:77

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::drop_begin

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

Definition:STLExtras.h:329

llvm::dump

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

Definition:SparseBitVector.h:877

llvm::Offset

@ Offset

Definition:DWP.cpp:480

llvm::find

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1759

llvm::all_of

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1739

llvm::PseudoProbeType::Block

@ Block

llvm::Depth

@ Depth

Definition:SIMachineScheduler.h:36

llvm::operator!=

bool operator!=(uint64_t V1, const APInt &V2)

Definition:APInt.h:2082

llvm::append_range

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

Definition:STLExtras.h:2115

llvm::LoopSimplifyID

char & LoopSimplifyID

Definition:LoopSimplify.cpp:784

llvm::operator==

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

Definition:AddressRanges.h:153

llvm::countr_zero

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

Definition:bit.h:215

llvm::matchSimpleRecurrence

bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)

Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...

Definition:ValueTracking.cpp:9214

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1746

llvm::Log2_32

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

Definition:MathExtras.h:341

llvm::DeleteDeadPHIs

bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)

Examine each PHI in the given block and delete it if it is dead.

Definition:BasicBlockUtils.cpp:164

llvm::reverse

auto reverse(ContainerTy &&C)

Definition:STLExtras.h:420

llvm::denormalizeForPostIncUse

const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)

Denormalize S to be post-increment for all loops present in Loops.

Definition:ScalarEvolutionNormalization.cpp:120

llvm::get

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

Definition:PointerIntPair.h:270

llvm::sort

void sort(IteratorTy Start, IteratorTy End)

Definition:STLExtras.h:1664

llvm::dbgs

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition:Debug.cpp:163

llvm::none_of

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1753

llvm::ConstantFoldCastOperand

Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)

Attempt to constant fold a cast with the specified operand.

Definition:ConstantFolding.cpp:1462

llvm::SplitLandingPadPredecessors

void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)

This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...

Definition:BasicBlockUtils.cpp:1539

llvm::normalizeForPostIncUse

const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)

Normalize S to be post-increment for all loops present in Loops.

Definition:ScalarEvolutionNormalization.cpp:97

llvm::errs

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

Definition:raw_ostream.cpp:907

llvm::IRMemLocation::First

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

llvm::RecurKind::Add

@ Add

Sum of integers.

llvm::count

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

Definition:STLExtras.h:1938

llvm::createLoopStrengthReducePass

Pass * createLoopStrengthReducePass()

Definition:LoopStrengthReduce.cpp:7193

llvm::SplitCriticalEdge

BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")

If this edge is a critical edge, insert a new node to split the critical edge.

Definition:BreakCriticalEdges.cpp:101

llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive

bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...

Definition:Local.cpp:561

llvm::BitWidth

constexpr unsigned BitWidth

Definition:BitmaskEnum.h:217

llvm::PseudoProbeReservedId::Last

@ Last

llvm::formLCSSAForInstructions

bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)

Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.

Definition:LCSSA.cpp:325

llvm::initializeLoopStrengthReducePass

void initializeLoopStrengthReducePass(PassRegistry &)

llvm::getLoopPassPreservedAnalyses

PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

Definition:LoopAnalysisManager.cpp:138

llvm::find_if

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1766

llvm::rewriteLoopExitValues

int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)

If the final value of any expressions that are recurrent in the loop can be computed,...

Definition:LoopUtils.cpp:1549

llvm::is_contained

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Definition:STLExtras.h:1903

llvm::scAddRecExpr

@ scAddRecExpr

Definition:ScalarEvolutionExpressions.h:48

llvm::scAddExpr

@ scAddExpr

Definition:ScalarEvolutionExpressions.h:45