Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
LoopStrengthReduce.cpp
Go to the documentation of this file.
1//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into forms suitable for efficient execution
11// on the target.
12//
13// This pass performs a strength reduction on array references inside loops that
14// have as one or more of their components the loop induction variable, it
15// rewrites expressions to take advantage of scaled-index addressing modes
16// available on the target, and it performs a variety of other optimizations
17// related to loop induction variables.
18//
19// Terminology note: this code has a lot of handling for "post-increment" or
20// "post-inc" users. This is not talking about post-increment addressing modes;
21// it is instead talking about code like this:
22//
23// %i = phi [ 0, %entry ], [ %i.next, %latch ]
24// ...
25// %i.next = add %i, 1
26// %c = icmp eq %i.next, %n
27//
28// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
29// it's useful to think about these as the same register, with some uses using
30// the value of the register before the add and some using it after. In this
31// example, the icmp is a post-increment user, since it uses %i.next, which is
32// the value of the induction variable after the increment. The other common
33// case of post-increment users is users outside the loop.
34//
35// TODO: More sophistication in the way Formulae are generated and filtered.
36//
37// TODO: Handle multiple loops at a time.
38//
39// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
40// of a GlobalValue?
41//
42// TODO: When truncation is free, truncate ICmp users' operands to make it a
43// smaller encoding (on x86 at least).
44//
45// TODO: When a negated register is used by an add (such as in a list of
46// multiple base registers, or as the increment expression in an addrec),
47// we may not actually need both reg and (-1 * reg) in registers; the
48// negation can be implemented by using a sub instead of an add. The
49// lack of support for taking this into consideration when making
50// register pressure decisions is partly worked around by the "Special"
51// use kind.
52//
53//===----------------------------------------------------------------------===//
54
55#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
56#include "llvm/ADT/APInt.h"
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/DenseSet.h"
59#include "llvm/ADT/Hashing.h"
60#include "llvm/ADT/PointerIntPair.h"
61#include "llvm/ADT/STLExtras.h"
62#include "llvm/ADT/SetVector.h"
63#include "llvm/ADT/SmallBitVector.h"
64#include "llvm/ADT/SmallPtrSet.h"
65#include "llvm/ADT/SmallSet.h"
66#include "llvm/ADT/SmallVector.h"
67#include "llvm/ADT/Statistic.h"
68#include "llvm/ADT/iterator_range.h"
69#include "llvm/Analysis/AssumptionCache.h"
70#include "llvm/Analysis/DomTreeUpdater.h"
71#include "llvm/Analysis/IVUsers.h"
72#include "llvm/Analysis/LoopAnalysisManager.h"
73#include "llvm/Analysis/LoopInfo.h"
74#include "llvm/Analysis/LoopPass.h"
75#include "llvm/Analysis/MemorySSA.h"
76#include "llvm/Analysis/MemorySSAUpdater.h"
77#include "llvm/Analysis/ScalarEvolution.h"
78#include "llvm/Analysis/ScalarEvolutionExpressions.h"
79#include "llvm/Analysis/ScalarEvolutionNormalization.h"
80#include "llvm/Analysis/TargetLibraryInfo.h"
81#include "llvm/Analysis/TargetTransformInfo.h"
82#include "llvm/Analysis/ValueTracking.h"
83#include "llvm/BinaryFormat/Dwarf.h"
84#include "llvm/Config/llvm-config.h"
85#include "llvm/IR/BasicBlock.h"
86#include "llvm/IR/Constant.h"
87#include "llvm/IR/Constants.h"
88#include "llvm/IR/DebugInfoMetadata.h"
89#include "llvm/IR/DerivedTypes.h"
90#include "llvm/IR/Dominators.h"
91#include "llvm/IR/GlobalValue.h"
92#include "llvm/IR/IRBuilder.h"
93#include "llvm/IR/InstrTypes.h"
94#include "llvm/IR/Instruction.h"
95#include "llvm/IR/Instructions.h"
96#include "llvm/IR/IntrinsicInst.h"
97#include "llvm/IR/Module.h"
98#include "llvm/IR/Operator.h"
99#include "llvm/IR/Type.h"
100#include "llvm/IR/Use.h"
101#include "llvm/IR/User.h"
102#include "llvm/IR/Value.h"
103#include "llvm/IR/ValueHandle.h"
104#include "llvm/InitializePasses.h"
105#include "llvm/Pass.h"
106#include "llvm/Support/Casting.h"
107#include "llvm/Support/CommandLine.h"
108#include "llvm/Support/Compiler.h"
109#include "llvm/Support/Debug.h"
110#include "llvm/Support/ErrorHandling.h"
111#include "llvm/Support/MathExtras.h"
112#include "llvm/Support/raw_ostream.h"
113#include "llvm/Transforms/Scalar.h"
114#include "llvm/Transforms/Utils.h"
115#include "llvm/Transforms/Utils/BasicBlockUtils.h"
116#include "llvm/Transforms/Utils/Local.h"
117#include "llvm/Transforms/Utils/LoopUtils.h"
118#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
119#include <algorithm>
120#include <cassert>
121#include <cstddef>
122#include <cstdint>
123#include <iterator>
124#include <limits>
125#include <map>
126#include <numeric>
127#include <optional>
128#include <utility>
129
130using namespacellvm;
131
132#define DEBUG_TYPE "loop-reduce"
133
134/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
135/// bail out. This threshold is far beyond the number of users that LSR can
136/// conceivably solve, so it should not affect generated code, but catches the
137/// worst cases before LSR burns too much compile time and stack space.
138staticconstunsignedMaxIVUsers = 200;
139
140/// Limit the size of expression that SCEV-based salvaging will attempt to
141/// translate into a DIExpression.
142/// Choose a maximum size such that debuginfo is not excessively increased and
143/// the salvaging is not too expensive for the compiler.
144staticconstunsignedMaxSCEVSalvageExpressionSize = 64;
145
146// Cleanup congruent phis after LSR phi expansion.
147staticcl::opt<bool>EnablePhiElim(
148"enable-lsr-phielim",cl::Hidden,cl::init(true),
149cl::desc("Enable LSR phi elimination"));
150
151// The flag adds instruction count to solutions cost comparison.
152staticcl::opt<bool>InsnsCost(
153"lsr-insns-cost",cl::Hidden,cl::init(true),
154cl::desc("Add instruction count to a LSR cost model"));
155
156// Flag to choose how to narrow complex lsr solution
157staticcl::opt<bool>LSRExpNarrow(
158"lsr-exp-narrow",cl::Hidden,cl::init(false),
159cl::desc("Narrow LSR complex solution using"
160" expectation of registers number"));
161
162// Flag to narrow search space by filtering non-optimal formulae with
163// the same ScaledReg and Scale.
164staticcl::opt<bool>FilterSameScaledReg(
165"lsr-filter-same-scaled-reg",cl::Hidden,cl::init(true),
166cl::desc("Narrow LSR search space by filtering non-optimal formulae"
167" with the same ScaledReg and Scale"));
168
169staticcl::opt<TTI::AddressingModeKind>PreferredAddresingMode(
170"lsr-preferred-addressing-mode",cl::Hidden,cl::init(TTI::AMK_None),
171cl::desc("A flag that overrides the target's preferred addressing mode."),
172cl::values(clEnumValN(TTI::AMK_None,
173"none",
174"Don't prefer any addressing mode"),
175clEnumValN(TTI::AMK_PreIndexed,
176"preindexed",
177"Prefer pre-indexed addressing mode"),
178clEnumValN(TTI::AMK_PostIndexed,
179"postindexed",
180"Prefer post-indexed addressing mode")));
181
182staticcl::opt<unsigned>ComplexityLimit(
183"lsr-complexity-limit",cl::Hidden,
184cl::init(std::numeric_limits<uint16_t>::max()),
185cl::desc("LSR search space complexity limit"));
186
187staticcl::opt<unsigned>SetupCostDepthLimit(
188"lsr-setupcost-depth-limit",cl::Hidden,cl::init(7),
189cl::desc("The limit on recursion depth for LSRs setup cost"));
190
191staticcl::opt<cl::boolOrDefault>AllowDropSolutionIfLessProfitable(
192"lsr-drop-solution",cl::Hidden,
193cl::desc("Attempt to drop solution if it is less profitable"));
194
195staticcl::opt<bool>EnableVScaleImmediates(
196"lsr-enable-vscale-immediates",cl::Hidden,cl::init(true),
197cl::desc("Enable analysis of vscale-relative immediates in LSR"));
198
199staticcl::opt<bool>DropScaledForVScale(
200"lsr-drop-scaled-reg-for-vscale",cl::Hidden,cl::init(true),
201cl::desc("Avoid using scaled registers with vscale-relative addressing"));
202
203#ifndef NDEBUG
204// Stress test IV chain generation.
205staticcl::opt<bool>StressIVChain(
206"stress-ivchain",cl::Hidden,cl::init(false),
207cl::desc("Stress test LSR IV chains"));
208#else
209staticboolStressIVChain =false;
210#endif
211
212namespace{
213
214structMemAccessTy {
215 /// Used in situations where the accessed memory type is unknown.
216staticconstunsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
218
219Type *MemTy =nullptr;
220unsigned AddrSpace = UnknownAddressSpace;
221
222 MemAccessTy() =default;
223 MemAccessTy(Type *Ty,unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224
225booloperator==(MemAccessTyOther) const{
226return MemTy ==Other.MemTy && AddrSpace ==Other.AddrSpace;
227 }
228
229booloperator!=(MemAccessTyOther) const{return !(*this ==Other); }
230
231static MemAccessTy getUnknown(LLVMContext &Ctx,
232unsigned AS = UnknownAddressSpace) {
233return MemAccessTy(Type::getVoidTy(Ctx), AS);
234 }
235
236Type *getType() {return MemTy; }
237};
238
239/// This class holds data which is used to order reuse candidates.
240classRegSortData {
241public:
242 /// This represents the set of LSRUse indices which reference
243 /// a particular register.
244SmallBitVector UsedByIndices;
245
246voidprint(raw_ostream &OS)const;
247voiddump()const;
248};
249
250// An offset from an address that is either scalable or fixed. Used for
251// per-target optimizations of addressing modes.
252classImmediate :publicdetails::FixedOrScalableQuantity<Immediate, int64_t> {
253constexpr Immediate(ScalarTy MinVal,bool Scalable)
254 : FixedOrScalableQuantity(MinVal, Scalable) {}
255
256constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)
257 : FixedOrScalableQuantity(V) {}
258
259public:
260constexpr Immediate() =delete;
261
262staticconstexpr Immediate getFixed(ScalarTy MinVal) {
263return {MinVal,false};
264 }
265staticconstexpr Immediate getScalable(ScalarTy MinVal) {
266return {MinVal,true};
267 }
268staticconstexpr Immediateget(ScalarTy MinVal,bool Scalable) {
269return {MinVal, Scalable};
270 }
271staticconstexpr Immediate getZero() {return {0,false}; }
272staticconstexpr Immediate getFixedMin() {
273return {std::numeric_limits<int64_t>::min(),false};
274 }
275staticconstexpr Immediate getFixedMax() {
276return {std::numeric_limits<int64_t>::max(),false};
277 }
278staticconstexpr Immediate getScalableMin() {
279return {std::numeric_limits<int64_t>::min(),true};
280 }
281staticconstexpr Immediate getScalableMax() {
282return {std::numeric_limits<int64_t>::max(),true};
283 }
284
285constexprbool isLessThanZero() const{return Quantity < 0; }
286
287constexprbool isGreaterThanZero() const{return Quantity > 0; }
288
289constexprbool isCompatibleImmediate(const Immediate &Imm) const{
290returnisZero() ||Imm.isZero() ||Imm.Scalable == Scalable;
291 }
292
293constexprbool isMin() const{
294return Quantity == std::numeric_limits<ScalarTy>::min();
295 }
296
297constexprbool isMax() const{
298return Quantity == std::numeric_limits<ScalarTy>::max();
299 }
300
301// Arithmetic 'operators' that cast to unsigned types first.
302constexpr Immediate addUnsigned(const Immediate &RHS) const{
303assert(isCompatibleImmediate(RHS) &&"Incompatible Immediates");
304 ScalarTyValue = (uint64_t)Quantity +RHS.getKnownMinValue();
305return {Value, Scalable ||RHS.isScalable()};
306 }
307
308constexpr Immediate subUnsigned(const Immediate &RHS) const{
309assert(isCompatibleImmediate(RHS) &&"Incompatible Immediates");
310 ScalarTyValue = (uint64_t)Quantity -RHS.getKnownMinValue();
311return {Value, Scalable ||RHS.isScalable()};
312 }
313
314// Scale the quantity by a constant without caring about runtime scalability.
315constexpr Immediate mulUnsigned(const ScalarTy RHS) const{
316 ScalarTyValue = (uint64_t)Quantity * RHS;
317return {Value, Scalable};
318 }
319
320// Helpers for generating SCEVs with vscale terms where needed.
321constSCEV *getSCEV(ScalarEvolution &SE,Type *Ty) const{
322constSCEV *S = SE.getConstant(Ty, Quantity);
323if (Scalable)
324 S = SE.getMulExpr(S, SE.getVScale(S->getType()));
325return S;
326 }
327
328constSCEV *getNegativeSCEV(ScalarEvolution &SE,Type *Ty) const{
329constSCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);
330if (Scalable)
331 NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType()));
332return NegS;
333 }
334
335constSCEV *getUnknownSCEV(ScalarEvolution &SE,Type *Ty) const{
336constSCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity));
337if (Scalable)
338 SU = SE.getMulExpr(SU, SE.getVScale(SU->getType()));
339return SU;
340 }
341};
342
343// This is needed for the Compare type of std::map when Immediate is used
344// as a key. We don't need it to be fully correct against any value of vscale,
345// just to make sure that vscale-related terms in the map are considered against
346// each other rather than being mixed up and potentially missing opportunities.
347structKeyOrderTargetImmediate {
348bool operator()(const Immediate &LHS,const Immediate &RHS) const{
349if (LHS.isScalable() && !RHS.isScalable())
350returnfalse;
351if (!LHS.isScalable() &&RHS.isScalable())
352returntrue;
353returnLHS.getKnownMinValue() <RHS.getKnownMinValue();
354 }
355};
356
357// This would be nicer if we could be generic instead of directly using size_t,
358// but there doesn't seem to be a type trait for is_orderable or
359// is_lessthan_comparable or similar.
360structKeyOrderSizeTAndImmediate {
361bool operator()(const std::pair<size_t, Immediate> &LHS,
362const std::pair<size_t, Immediate> &RHS) const{
363size_t LSize =LHS.first;
364size_t RSize =RHS.first;
365if (LSize != RSize)
366return LSize < RSize;
367return KeyOrderTargetImmediate()(LHS.second,RHS.second);
368 }
369};
370}// end anonymous namespace
371
372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
373void RegSortData::print(raw_ostream &OS) const{
374OS <<"[NumUses=" << UsedByIndices.count() <<']';
375}
376
377LLVM_DUMP_METHODvoid RegSortData::dump() const{
378print(errs());errs() <<'\n';
379}
380#endif
381
382namespace{
383
384/// Map register candidates to information about how they are used.
385classRegUseTracker {
386usingRegUsesTy =DenseMap<const SCEV *, RegSortData>;
387
388 RegUsesTy RegUsesMap;
389SmallVector<const SCEV *, 16>RegSequence;
390
391public:
392void countRegister(constSCEV *Reg,size_t LUIdx);
393void dropRegister(constSCEV *Reg,size_t LUIdx);
394void swapAndDropUse(size_t LUIdx,size_t LastLUIdx);
395
396bool isRegUsedByUsesOtherThan(constSCEV *Reg,size_t LUIdx)const;
397
398constSmallBitVector &getUsedByIndices(constSCEV *Reg)const;
399
400void clear();
401
402usingiterator =SmallVectorImpl<const SCEV *>::iterator;
403usingconst_iterator =SmallVectorImpl<const SCEV *>::const_iterator;
404
405 iteratorbegin() {returnRegSequence.begin(); }
406 iteratorend() {returnRegSequence.end(); }
407const_iteratorbegin() const{returnRegSequence.begin(); }
408const_iteratorend() const{returnRegSequence.end(); }
409};
410
411}// end anonymous namespace
412
413void
414RegUseTracker::countRegister(constSCEV *Reg,size_t LUIdx) {
415 std::pair<RegUsesTy::iterator, bool> Pair =
416 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
417 RegSortData &RSD = Pair.first->second;
418if (Pair.second)
419RegSequence.push_back(Reg);
420 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
421 RSD.UsedByIndices.set(LUIdx);
422}
423
424void
425RegUseTracker::dropRegister(constSCEV *Reg,size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(Reg);
427assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
429assert(RSD.UsedByIndices.size() > LUIdx);
430 RSD.UsedByIndices.reset(LUIdx);
431}
432
433void
434RegUseTracker::swapAndDropUse(size_t LUIdx,size_t LastLUIdx) {
435assert(LUIdx <= LastLUIdx);
436
437// Update RegUses. The data structure is not optimized for this purpose;
438// we must iterate through it and update each of the bit vectors.
439for (auto &Pair : RegUsesMap) {
440SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441if (LUIdx < UsedByIndices.size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] :false;
444 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
445 }
446}
447
448bool
449RegUseTracker::isRegUsedByUsesOtherThan(constSCEV *Reg,size_t LUIdx) const{
450 RegUsesTy::const_iteratorI = RegUsesMap.find(Reg);
451if (I == RegUsesMap.end())
452returnfalse;
453constSmallBitVector &UsedByIndices =I->second.UsedByIndices;
454int i = UsedByIndices.find_first();
455if (i == -1)returnfalse;
456if ((size_t)i != LUIdx)returntrue;
457return UsedByIndices.find_next(i) != -1;
458}
459
460constSmallBitVector &RegUseTracker::getUsedByIndices(constSCEV *Reg) const{
461 RegUsesTy::const_iteratorI = RegUsesMap.find(Reg);
462assert(I != RegUsesMap.end() &&"Unknown register!");
463returnI->second.UsedByIndices;
464}
465
466void RegUseTracker::clear() {
467 RegUsesMap.clear();
468RegSequence.clear();
469}
470
471namespace{
472
473/// This class holds information that describes a formula for computing
474/// satisfying a use. It may include broken-out immediates and scaled registers.
475structFormula {
476 /// Global base address used for complex addressing.
477GlobalValue *BaseGV =nullptr;
478
479 /// Base offset for complex addressing.
480 Immediate BaseOffset = Immediate::getZero();
481
482 /// Whether any complex addressing has a base register.
483bool HasBaseReg =false;
484
485 /// The scale of any complex addressing.
486 int64_t Scale = 0;
487
488 /// The list of "base" registers for this use. When this is non-empty. The
489 /// canonical representation of a formula is
490 /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
491 /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
492 /// 3. The reg containing recurrent expr related with currect loop in the
493 /// formula should be put in the ScaledReg.
494 /// #1 enforces that the scaled register is always used when at least two
495 /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
496 /// #2 enforces that 1 * reg is reg.
497 /// #3 ensures invariant regs with respect to current loop can be combined
498 /// together in LSR codegen.
499 /// This invariant can be temporarily broken while building a formula.
500 /// However, every formula inserted into the LSRInstance must be in canonical
501 /// form.
502SmallVector<const SCEV *, 4> BaseRegs;
503
504 /// The 'scaled' register for this use. This should be non-null when Scale is
505 /// not zero.
506constSCEV *ScaledReg =nullptr;
507
508 /// An additional constant offset which added near the use. This requires a
509 /// temporary register, but the offset itself can live in an add immediate
510 /// field rather than a register.
511 Immediate UnfoldedOffset = Immediate::getZero();
512
513 Formula() =default;
514
515void initialMatch(constSCEV *S,Loop *L,ScalarEvolution &SE);
516
517boolisCanonical(constLoop &L)const;
518
519void canonicalize(constLoop &L);
520
521bool unscale();
522
523bool hasZeroEnd()const;
524
525size_t getNumRegs()const;
526Type *getType()const;
527
528void deleteBaseReg(constSCEV *&S);
529
530bool referencesReg(constSCEV *S)const;
531bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
532const RegUseTracker &RegUses)const;
533
534voidprint(raw_ostream &OS)const;
535voiddump()const;
536};
537
538}// end anonymous namespace
539
540/// Recursion helper for initialMatch.
541staticvoidDoInitialMatch(constSCEV *S,Loop *L,
542SmallVectorImpl<const SCEV *> &Good,
543SmallVectorImpl<const SCEV *> &Bad,
544ScalarEvolution &SE) {
545// Collect expressions which properly dominate the loop header.
546if (SE.properlyDominates(S, L->getHeader())) {
547 Good.push_back(S);
548return;
549 }
550
551// Look at add operands.
552if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
553for (constSCEV *S :Add->operands())
554DoInitialMatch(S, L, Good, Bad, SE);
555return;
556 }
557
558// Look at addrec operands.
559if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
560if (!AR->getStart()->isZero() && AR->isAffine()) {
561DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
562DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
563 AR->getStepRecurrence(SE),
564// FIXME: AR->getNoWrapFlags()
565 AR->getLoop(),SCEV::FlagAnyWrap),
566 L, Good, Bad, SE);
567return;
568 }
569
570// Handle a multiplication by -1 (negation) if it didn't fold.
571if (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
572if (Mul->getOperand(0)->isAllOnesValue()) {
573SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
574constSCEV *NewMul = SE.getMulExpr(Ops);
575
576SmallVector<const SCEV *, 4> MyGood;
577SmallVector<const SCEV *, 4> MyBad;
578DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
579constSCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
580 SE.getEffectiveSCEVType(NewMul->getType())));
581for (constSCEV *S : MyGood)
582 Good.push_back(SE.getMulExpr(NegOne, S));
583for (constSCEV *S : MyBad)
584 Bad.push_back(SE.getMulExpr(NegOne, S));
585return;
586 }
587
588// Ok, we can't do anything interesting. Just stuff the whole thing into a
589// register and hope for the best.
590 Bad.push_back(S);
591}
592
593/// Incorporate loop-variant parts of S into this Formula, attempting to keep
594/// all loop-invariant and loop-computable values in a single base register.
595void Formula::initialMatch(constSCEV *S,Loop *L,ScalarEvolution &SE) {
596SmallVector<const SCEV *, 4> Good;
597SmallVector<const SCEV *, 4> Bad;
598DoInitialMatch(S, L, Good, Bad, SE);
599if (!Good.empty()) {
600constSCEV *Sum = SE.getAddExpr(Good);
601if (!Sum->isZero())
602 BaseRegs.push_back(Sum);
603 HasBaseReg =true;
604 }
605if (!Bad.empty()) {
606constSCEV *Sum = SE.getAddExpr(Bad);
607if (!Sum->isZero())
608 BaseRegs.push_back(Sum);
609 HasBaseReg =true;
610 }
611 canonicalize(*L);
612}
613
614staticboolcontainsAddRecDependentOnLoop(constSCEV *S,constLoop &L) {
615returnSCEVExprContains(S, [&L](constSCEV *S) {
616return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
617 });
618}
619
620/// Check whether or not this formula satisfies the canonical
621/// representation.
622/// \see Formula::BaseRegs.
623bool Formula::isCanonical(constLoop &L) const{
624assert((Scale == 0 || ScaledReg) &&
625"ScaledReg must be non-null if Scale is non-zero");
626
627if (!ScaledReg)
628return BaseRegs.size() <= 1;
629
630if (Scale != 1)
631returntrue;
632
633if (Scale == 1 && BaseRegs.empty())
634returnfalse;
635
636if (containsAddRecDependentOnLoop(ScaledReg, L))
637returntrue;
638
639// If ScaledReg is not a recurrent expr, or it is but its loop is not current
640// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
641// loop, we want to swap the reg in BaseRegs with ScaledReg.
642returnnone_of(BaseRegs, [&L](constSCEV *S) {
643returncontainsAddRecDependentOnLoop(S, L);
644 });
645}
646
647/// Helper method to morph a formula into its canonical representation.
648/// \see Formula::BaseRegs.
649/// Every formula having more than one base register, must use the ScaledReg
650/// field. Otherwise, we would have to do special cases everywhere in LSR
651/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
652/// On the other hand, 1*reg should be canonicalized into reg.
653void Formula::canonicalize(constLoop &L) {
654if (isCanonical(L))
655return;
656
657if (BaseRegs.empty()) {
658// No base reg? Use scale reg with scale = 1 as such.
659assert(ScaledReg &&"Expected 1*reg => reg");
660assert(Scale == 1 &&"Expected 1*reg => reg");
661 BaseRegs.push_back(ScaledReg);
662 Scale = 0;
663 ScaledReg =nullptr;
664return;
665 }
666
667// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
668if (!ScaledReg) {
669 ScaledReg = BaseRegs.pop_back_val();
670 Scale = 1;
671 }
672
673// If ScaledReg is an invariant with respect to L, find the reg from
674// BaseRegs containing the recurrent expr related with Loop L. Swap the
675// reg with ScaledReg.
676if (!containsAddRecDependentOnLoop(ScaledReg, L)) {
677autoI =find_if(BaseRegs, [&L](constSCEV *S) {
678returncontainsAddRecDependentOnLoop(S, L);
679 });
680if (I != BaseRegs.end())
681std::swap(ScaledReg, *I);
682 }
683assert(isCanonical(L) &&"Failed to canonicalize?");
684}
685
686/// Get rid of the scale in the formula.
687/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
688/// \return true if it was possible to get rid of the scale, false otherwise.
689/// \note After this operation the formula may not be in the canonical form.
690bool Formula::unscale() {
691if (Scale != 1)
692returnfalse;
693 Scale = 0;
694 BaseRegs.push_back(ScaledReg);
695 ScaledReg =nullptr;
696returntrue;
697}
698
699bool Formula::hasZeroEnd() const{
700if (UnfoldedOffset || BaseOffset)
701returnfalse;
702if (BaseRegs.size() != 1 || ScaledReg)
703returnfalse;
704returntrue;
705}
706
707/// Return the total number of register operands used by this formula. This does
708/// not include register uses implied by non-constant addrec strides.
709size_t Formula::getNumRegs() const{
710return !!ScaledReg + BaseRegs.size();
711}
712
713/// Return the type of this formula, if it has one, or null otherwise. This type
714/// is meaningless except for the bit size.
715Type *Formula::getType() const{
716return !BaseRegs.empty() ? BaseRegs.front()->getType() :
717 ScaledReg ? ScaledReg->getType() :
718 BaseGV ? BaseGV->getType() :
719nullptr;
720}
721
722/// Delete the given base reg from the BaseRegs list.
723void Formula::deleteBaseReg(constSCEV *&S) {
724if (&S != &BaseRegs.back())
725std::swap(S, BaseRegs.back());
726 BaseRegs.pop_back();
727}
728
729/// Test if this formula references the given register.
730bool Formula::referencesReg(constSCEV *S) const{
731return S == ScaledReg ||is_contained(BaseRegs, S);
732}
733
734/// Test whether this formula uses registers which are used by uses other than
735/// the use with the given index.
736bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
737const RegUseTracker &RegUses) const{
738if (ScaledReg)
739if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
740returntrue;
741for (constSCEV *BaseReg : BaseRegs)
742if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
743returntrue;
744returnfalse;
745}
746
747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
748void Formula::print(raw_ostream &OS) const{
749boolFirst =true;
750if (BaseGV) {
751if (!First)OS <<" + ";elseFirst =false;
752 BaseGV->printAsOperand(OS,/*PrintType=*/false);
753 }
754if (BaseOffset.isNonZero()) {
755if (!First)OS <<" + ";elseFirst =false;
756OS << BaseOffset;
757 }
758for (constSCEV *BaseReg : BaseRegs) {
759if (!First)OS <<" + ";elseFirst =false;
760OS <<"reg(" << *BaseReg <<')';
761 }
762if (HasBaseReg && BaseRegs.empty()) {
763if (!First)OS <<" + ";elseFirst =false;
764OS <<"**error: HasBaseReg**";
765 }elseif (!HasBaseReg && !BaseRegs.empty()) {
766if (!First)OS <<" + ";elseFirst =false;
767OS <<"**error: !HasBaseReg**";
768 }
769if (Scale != 0) {
770if (!First)OS <<" + ";elseFirst =false;
771OS << Scale <<"*reg(";
772if (ScaledReg)
773OS << *ScaledReg;
774else
775OS <<"<unknown>";
776OS <<')';
777 }
778if (UnfoldedOffset.isNonZero()) {
779if (!First)OS <<" + ";
780OS <<"imm(" << UnfoldedOffset <<')';
781 }
782}
783
784LLVM_DUMP_METHODvoid Formula::dump() const{
785print(errs());errs() <<'\n';
786}
787#endif
788
789/// Return true if the given addrec can be sign-extended without changing its
790/// value.
791staticboolisAddRecSExtable(constSCEVAddRecExpr *AR,ScalarEvolution &SE) {
792Type *WideTy =
793IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
794return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
795}
796
797/// Return true if the given add can be sign-extended without changing its
798/// value.
799staticboolisAddSExtable(constSCEVAddExpr *A,ScalarEvolution &SE) {
800Type *WideTy =
801IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
802return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
803}
804
805/// Return true if the given mul can be sign-extended without changing its
806/// value.
807staticboolisMulSExtable(constSCEVMulExpr *M,ScalarEvolution &SE) {
808Type *WideTy =
809IntegerType::get(SE.getContext(),
810 SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
811return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
812}
813
814/// Return an expression for LHS /s RHS, if it can be determined and if the
815/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
816/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that
817/// the multiplication may overflow, which is useful when the result will be
818/// used in a context where the most significant bits are ignored.
819staticconstSCEV *getExactSDiv(constSCEV *LHS,constSCEV *RHS,
820ScalarEvolution &SE,
821bool IgnoreSignificantBits =false) {
822// Handle the trivial case, which works for any SCEV type.
823if (LHS ==RHS)
824return SE.getConstant(LHS->getType(), 1);
825
826// Handle a few RHS special cases.
827constSCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
828if (RC) {
829constAPInt &RA = RC->getAPInt();
830// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
831// some folding.
832if (RA.isAllOnes()) {
833if (LHS->getType()->isPointerTy())
834returnnullptr;
835return SE.getMulExpr(LHS, RC);
836 }
837// Handle x /s 1 as x.
838if (RA == 1)
839returnLHS;
840 }
841
842// Check for a division of a constant by a constant.
843if (constSCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
844if (!RC)
845returnnullptr;
846constAPInt &LA =C->getAPInt();
847constAPInt &RA = RC->getAPInt();
848if (LA.srem(RA) != 0)
849returnnullptr;
850return SE.getConstant(LA.sdiv(RA));
851 }
852
853// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
854if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
855if ((IgnoreSignificantBits ||isAddRecSExtable(AR, SE)) && AR->isAffine()) {
856constSCEV *Step =getExactSDiv(AR->getStepRecurrence(SE),RHS, SE,
857 IgnoreSignificantBits);
858if (!Step)returnnullptr;
859constSCEV *Start =getExactSDiv(AR->getStart(),RHS, SE,
860 IgnoreSignificantBits);
861if (!Start)returnnullptr;
862// FlagNW is independent of the start value, step direction, and is
863// preserved with smaller magnitude steps.
864// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
865return SE.getAddRecExpr(Start, Step, AR->getLoop(),SCEV::FlagAnyWrap);
866 }
867returnnullptr;
868 }
869
870// Distribute the sdiv over add operands, if the add doesn't overflow.
871if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
872if (IgnoreSignificantBits ||isAddSExtable(Add, SE)) {
873SmallVector<const SCEV *, 8> Ops;
874for (constSCEV *S :Add->operands()) {
875constSCEV *Op =getExactSDiv(S,RHS, SE, IgnoreSignificantBits);
876if (!Op)returnnullptr;
877 Ops.push_back(Op);
878 }
879return SE.getAddExpr(Ops);
880 }
881returnnullptr;
882 }
883
884// Check for a multiply operand that we can pull RHS out of.
885if (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
886if (IgnoreSignificantBits ||isMulSExtable(Mul, SE)) {
887// Handle special case C1*X*Y /s C2*X*Y.
888if (constSCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {
889if (IgnoreSignificantBits ||isMulSExtable(MulRHS, SE)) {
890constSCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
891constSCEVConstant *RC =
892 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
893if (LC && RC) {
894SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));
895SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));
896if (LOps == ROps)
897returngetExactSDiv(LC, RC, SE, IgnoreSignificantBits);
898 }
899 }
900 }
901
902SmallVector<const SCEV *, 4> Ops;
903bool Found =false;
904for (constSCEV *S :Mul->operands()) {
905if (!Found)
906if (constSCEV *Q =getExactSDiv(S,RHS, SE,
907 IgnoreSignificantBits)) {
908 S = Q;
909 Found =true;
910 }
911 Ops.push_back(S);
912 }
913return Found ? SE.getMulExpr(Ops) :nullptr;
914 }
915returnnullptr;
916 }
917
918// Otherwise we don't know.
919returnnullptr;
920}
921
922/// If S involves the addition of a constant integer value, return that integer
923/// value, and mutate S to point to a new SCEV with that value excluded.
924static ImmediateExtractImmediate(constSCEV *&S,ScalarEvolution &SE) {
925if (constSCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
926if (C->getAPInt().getSignificantBits() <= 64) {
927 S = SE.getConstant(C->getType(), 0);
928return Immediate::getFixed(C->getValue()->getSExtValue());
929 }
930 }elseif (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
931SmallVector<const SCEV *, 8> NewOps(Add->operands());
932 Immediate Result =ExtractImmediate(NewOps.front(), SE);
933if (Result.isNonZero())
934 S = SE.getAddExpr(NewOps);
935return Result;
936 }elseif (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
937SmallVector<const SCEV *, 8> NewOps(AR->operands());
938 Immediate Result =ExtractImmediate(NewOps.front(), SE);
939if (Result.isNonZero())
940 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
941// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
942SCEV::FlagAnyWrap);
943return Result;
944 }elseif (constSCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
945if (EnableVScaleImmediates && M->getNumOperands() == 2) {
946if (constSCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
947if (isa<SCEVVScale>(M->getOperand(1))) {
948 S = SE.getConstant(M->getType(), 0);
949return Immediate::getScalable(C->getValue()->getSExtValue());
950 }
951 }
952 }
953return Immediate::getZero();
954}
955
956/// If S involves the addition of a GlobalValue address, return that symbol, and
957/// mutate S to point to a new SCEV with that value excluded.
958staticGlobalValue *ExtractSymbol(constSCEV *&S,ScalarEvolution &SE) {
959if (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
960if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
961 S = SE.getConstant(GV->getType(), 0);
962return GV;
963 }
964 }elseif (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
965SmallVector<const SCEV *, 8> NewOps(Add->operands());
966GlobalValue *Result =ExtractSymbol(NewOps.back(), SE);
967if (Result)
968 S = SE.getAddExpr(NewOps);
969return Result;
970 }elseif (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
971SmallVector<const SCEV *, 8> NewOps(AR->operands());
972GlobalValue *Result =ExtractSymbol(NewOps.front(), SE);
973if (Result)
974 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
975// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
976SCEV::FlagAnyWrap);
977return Result;
978 }
979returnnullptr;
980}
981
982/// Returns true if the specified instruction is using the specified value as an
983/// address.
984staticboolisAddressUse(constTargetTransformInfo &TTI,
985Instruction *Inst,Value *OperandVal) {
986bool isAddress = isa<LoadInst>(Inst);
987if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
988if (SI->getPointerOperand() == OperandVal)
989 isAddress =true;
990 }elseif (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
991// Addressing modes can also be folded into prefetches and a variety
992// of intrinsics.
993switch (II->getIntrinsicID()) {
994case Intrinsic::memset:
995case Intrinsic::prefetch:
996case Intrinsic::masked_load:
997if (II->getArgOperand(0) == OperandVal)
998 isAddress =true;
999break;
1000case Intrinsic::masked_store:
1001if (II->getArgOperand(1) == OperandVal)
1002 isAddress =true;
1003break;
1004case Intrinsic::memmove:
1005case Intrinsic::memcpy:
1006if (II->getArgOperand(0) == OperandVal ||
1007II->getArgOperand(1) == OperandVal)
1008 isAddress =true;
1009break;
1010default: {
1011MemIntrinsicInfo IntrInfo;
1012if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
1013if (IntrInfo.PtrVal == OperandVal)
1014 isAddress =true;
1015 }
1016 }
1017 }
1018 }elseif (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1019if (RMW->getPointerOperand() == OperandVal)
1020 isAddress =true;
1021 }elseif (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
1022if (CmpX->getPointerOperand() == OperandVal)
1023 isAddress =true;
1024 }
1025return isAddress;
1026}
1027
1028/// Return the type of the memory being accessed.
1029static MemAccessTygetAccessType(constTargetTransformInfo &TTI,
1030Instruction *Inst,Value *OperandVal) {
1031 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());
1032
1033// First get the type of memory being accessed.
1034if (Type *Ty = Inst->getAccessType())
1035 AccessTy.MemTy = Ty;
1036
1037// Then get the pointer address space.
1038if (constStoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1039 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1040 }elseif (constLoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1041 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1042 }elseif (constAtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1043 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1044 }elseif (constAtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
1045 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1046 }elseif (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
1047switch (II->getIntrinsicID()) {
1048case Intrinsic::prefetch:
1049case Intrinsic::memset:
1050 AccessTy.AddrSpace =II->getArgOperand(0)->getType()->getPointerAddressSpace();
1051 AccessTy.MemTy = OperandVal->getType();
1052break;
1053case Intrinsic::memmove:
1054case Intrinsic::memcpy:
1055 AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
1056 AccessTy.MemTy = OperandVal->getType();
1057break;
1058case Intrinsic::masked_load:
1059 AccessTy.AddrSpace =
1060II->getArgOperand(0)->getType()->getPointerAddressSpace();
1061break;
1062case Intrinsic::masked_store:
1063 AccessTy.AddrSpace =
1064II->getArgOperand(1)->getType()->getPointerAddressSpace();
1065break;
1066default: {
1067MemIntrinsicInfo IntrInfo;
1068if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
1069 AccessTy.AddrSpace
1070 = IntrInfo.PtrVal->getType()->getPointerAddressSpace();
1071 }
1072
1073break;
1074 }
1075 }
1076 }
1077
1078return AccessTy;
1079}
1080
1081/// Return true if this AddRec is already a phi in its loop.
1082staticboolisExistingPhi(constSCEVAddRecExpr *AR,ScalarEvolution &SE) {
1083for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
1084if (SE.isSCEVable(PN.getType()) &&
1085 (SE.getEffectiveSCEVType(PN.getType()) ==
1086 SE.getEffectiveSCEVType(AR->getType())) &&
1087 SE.getSCEV(&PN) == AR)
1088returntrue;
1089 }
1090returnfalse;
1091}
1092
1093/// Check if expanding this expression is likely to incur significant cost. This
1094/// is tricky because SCEV doesn't track which expressions are actually computed
1095/// by the current IR.
1096///
1097/// We currently allow expansion of IV increments that involve adds,
1098/// multiplication by constants, and AddRecs from existing phis.
1099///
1100/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
1101/// obvious multiple of the UDivExpr.
1102staticboolisHighCostExpansion(constSCEV *S,
1103SmallPtrSetImpl<const SCEV*> &Processed,
1104ScalarEvolution &SE) {
1105// Zero/One operand expressions
1106switch (S->getSCEVType()) {
1107casescUnknown:
1108casescConstant:
1109casescVScale:
1110returnfalse;
1111casescTruncate:
1112returnisHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
1113 Processed, SE);
1114casescZeroExtend:
1115returnisHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
1116 Processed, SE);
1117casescSignExtend:
1118returnisHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
1119 Processed, SE);
1120default:
1121break;
1122 }
1123
1124if (!Processed.insert(S).second)
1125returnfalse;
1126
1127if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
1128for (constSCEV *S :Add->operands()) {
1129if (isHighCostExpansion(S, Processed, SE))
1130returntrue;
1131 }
1132returnfalse;
1133 }
1134
1135if (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
1136if (Mul->getNumOperands() == 2) {
1137// Multiplication by a constant is ok
1138if (isa<SCEVConstant>(Mul->getOperand(0)))
1139returnisHighCostExpansion(Mul->getOperand(1), Processed, SE);
1140
1141// If we have the value of one operand, check if an existing
1142// multiplication already generates this expression.
1143if (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
1144Value *UVal = U->getValue();
1145for (User *UR : UVal->users()) {
1146// If U is a constant, it may be used by a ConstantExpr.
1147Instruction *UI = dyn_cast<Instruction>(UR);
1148if (UI && UI->getOpcode() == Instruction::Mul &&
1149 SE.isSCEVable(UI->getType())) {
1150return SE.getSCEV(UI) ==Mul;
1151 }
1152 }
1153 }
1154 }
1155 }
1156
1157if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
1158if (isExistingPhi(AR, SE))
1159returnfalse;
1160 }
1161
1162// Fow now, consider any other type of expression (div/mul/min/max) high cost.
1163returntrue;
1164}
1165
1166namespace{
1167
1168classLSRUse;
1169
1170}// end anonymous namespace
1171
1172/// Check if the addressing mode defined by \p F is completely
1173/// folded in \p LU at isel time.
1174/// This includes address-mode folding and special icmp tricks.
1175/// This function returns true if \p LU can accommodate what \p F
1176/// defines and up to 1 base + 1 scaled + offset.
1177/// In other words, if \p F has several base registers, this function may
1178/// still return true. Therefore, users still need to account for
1179/// additional base registers and/or unfolded offsets to derive an
1180/// accurate cost model.
1181staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,
1182const LSRUse &LU,const Formula &F);
1183
1184// Get the cost of the scaling factor used in F for LU.
1185staticInstructionCostgetScalingFactorCost(constTargetTransformInfo &TTI,
1186const LSRUse &LU,const Formula &F,
1187constLoop &L);
1188
1189namespace{
1190
1191/// This class is used to measure and compare candidate formulae.
1192classCost {
1193constLoop *L =nullptr;
1194ScalarEvolution *SE =nullptr;
1195constTargetTransformInfo *TTI =nullptr;
1196TargetTransformInfo::LSRCostC;
1197TTI::AddressingModeKind AMK =TTI::AMK_None;
1198
1199public:
1200Cost() =delete;
1201Cost(constLoop *L,ScalarEvolution &SE,constTargetTransformInfo &TTI,
1202TTI::AddressingModeKind AMK) :
1203L(L), SE(&SE),TTI(&TTI), AMK(AMK) {
1204C.Insns = 0;
1205C.NumRegs = 0;
1206C.AddRecCost = 0;
1207C.NumIVMuls = 0;
1208C.NumBaseAdds = 0;
1209C.ImmCost = 0;
1210C.SetupCost = 0;
1211C.ScaleCost = 0;
1212 }
1213
1214bool isLess(constCost &Other)const;
1215
1216void Lose();
1217
1218#ifndef NDEBUG
1219// Once any of the metrics loses, they must all remain losers.
1220boolisValid() {
1221return ((C.Insns |C.NumRegs |C.AddRecCost |C.NumIVMuls |C.NumBaseAdds
1222 |C.ImmCost |C.SetupCost |C.ScaleCost) != ~0u)
1223 || ((C.Insns &C.NumRegs &C.AddRecCost &C.NumIVMuls &C.NumBaseAdds
1224 &C.ImmCost &C.SetupCost &C.ScaleCost) == ~0u);
1225 }
1226#endif
1227
1228bool isLoser() {
1229assert(isValid() &&"invalid cost");
1230returnC.NumRegs == ~0u;
1231 }
1232
1233void RateFormula(const Formula &F,
1234SmallPtrSetImpl<const SCEV *> &Regs,
1235constDenseSet<const SCEV *> &VisitedRegs,
1236const LSRUse &LU,
1237SmallPtrSetImpl<const SCEV *> *LoserRegs =nullptr);
1238
1239voidprint(raw_ostream &OS)const;
1240voiddump()const;
1241
1242private:
1243void RateRegister(const Formula &F,constSCEV *Reg,
1244SmallPtrSetImpl<const SCEV *> &Regs);
1245void RatePrimaryRegister(const Formula &F,constSCEV *Reg,
1246SmallPtrSetImpl<const SCEV *> &Regs,
1247SmallPtrSetImpl<const SCEV *> *LoserRegs);
1248};
1249
1250/// An operand value in an instruction which is to be replaced with some
1251/// equivalent, possibly strength-reduced, replacement.
1252structLSRFixup {
1253 /// The instruction which will be updated.
1254Instruction *UserInst =nullptr;
1255
1256 /// The operand of the instruction which will be replaced. The operand may be
1257 /// used more than once; every instance will be replaced.
1258Value *OperandValToReplace =nullptr;
1259
1260 /// If this user is to use the post-incremented value of an induction
1261 /// variable, this set is non-empty and holds the loops associated with the
1262 /// induction variable.
1263PostIncLoopSet PostIncLoops;
1264
1265 /// A constant offset to be added to the LSRUse expression. This allows
1266 /// multiple fixups to share the same LSRUse with different offsets, for
1267 /// example in an unrolled loop.
1268 ImmediateOffset = Immediate::getZero();
1269
1270 LSRFixup() =default;
1271
1272bool isUseFullyOutsideLoop(constLoop *L)const;
1273
1274voidprint(raw_ostream &OS)const;
1275voiddump()const;
1276};
1277
1278/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
1279/// SmallVectors of const SCEV*.
1280structUniquifierDenseMapInfo {
1281staticSmallVector<const SCEV *, 4> getEmptyKey() {
1282SmallVector<const SCEV *, 4>V;
1283V.push_back(reinterpret_cast<constSCEV *>(-1));
1284returnV;
1285 }
1286
1287staticSmallVector<const SCEV *, 4> getTombstoneKey() {
1288SmallVector<const SCEV *, 4>V;
1289V.push_back(reinterpret_cast<constSCEV *>(-2));
1290returnV;
1291 }
1292
1293staticunsigned getHashValue(constSmallVector<const SCEV *, 4> &V) {
1294returnstatic_cast<unsigned>(hash_combine_range(V.begin(),V.end()));
1295 }
1296
1297staticboolisEqual(constSmallVector<const SCEV *, 4> &LHS,
1298constSmallVector<const SCEV *, 4> &RHS) {
1299returnLHS ==RHS;
1300 }
1301};
1302
1303/// This class holds the state that LSR keeps for each use in IVUsers, as well
1304/// as uses invented by LSR itself. It includes information about what kinds of
1305/// things can be folded into the user, information about the user itself, and
1306/// information about how the use may be satisfied. TODO: Represent multiple
1307/// users of the same expression in common?
1308classLSRUse {
1309DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
1310
1311public:
1312 /// An enum for a kind of use, indicating what types of scaled and immediate
1313 /// operands it might support.
1314enumKindType {
1315Basic,///< A normal use, with no folding.
1316 Special,///< A special case of basic, allowing -1 scales.
1317Address,///< An address use; folding according to TargetLowering
1318 ICmpZero///< An equality icmp with both operands folded into one.
1319// TODO: Add a generic icmp too?
1320 };
1321
1322usingSCEVUseKindPair =PointerIntPair<const SCEV *, 2, KindType>;
1323
1324KindTypeKind;
1325 MemAccessTy AccessTy;
1326
1327 /// The list of operands which are to be replaced.
1328SmallVector<LSRFixup, 8>Fixups;
1329
1330 /// Keep track of the min and max offsets of the fixups.
1331 Immediate MinOffset = Immediate::getFixedMax();
1332 Immediate MaxOffset = Immediate::getFixedMin();
1333
1334 /// This records whether all of the fixups using this LSRUse are outside of
1335 /// the loop, in which case some special-case heuristics may be used.
1336bool AllFixupsOutsideLoop =true;
1337
1338 /// RigidFormula is set to true to guarantee that this use will be associated
1339 /// with a single formula--the one that initially matched. Some SCEV
1340 /// expressions cannot be expanded. This allows LSR to consider the registers
1341 /// used by those expressions without the need to expand them later after
1342 /// changing the formula.
1343bool RigidFormula =false;
1344
1345 /// This records the widest use type for any fixup using this
1346 /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1347 /// fixup widths to be equivalent, because the narrower one may be relying on
1348 /// the implicit truncation to truncate away bogus bits.
1349Type *WidestFixupType =nullptr;
1350
1351 /// A list of ways to build a value that can satisfy this user. After the
1352 /// list is populated, one of these is selected heuristically and used to
1353 /// formulate a replacement for OperandValToReplace in UserInst.
1354SmallVector<Formula, 12> Formulae;
1355
1356 /// The set of register candidates used by all formulae in this LSRUse.
1357SmallPtrSet<const SCEV *, 4> Regs;
1358
1359 LSRUse(KindType K, MemAccessTy AT) :Kind(K), AccessTy(AT) {}
1360
1361 LSRFixup &getNewFixup() {
1362Fixups.push_back(LSRFixup());
1363returnFixups.back();
1364 }
1365
1366void pushFixup(LSRFixup &f) {
1367Fixups.push_back(f);
1368if (Immediate::isKnownGT(f.Offset, MaxOffset))
1369 MaxOffset =f.Offset;
1370if (Immediate::isKnownLT(f.Offset, MinOffset))
1371 MinOffset =f.Offset;
1372 }
1373
1374bool HasFormulaWithSameRegs(const Formula &F)const;
1375float getNotSelectedProbability(constSCEV *Reg)const;
1376bool InsertFormula(const Formula &F,constLoop &L);
1377void DeleteFormula(Formula &F);
1378void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1379
1380voidprint(raw_ostream &OS)const;
1381voiddump()const;
1382};
1383
1384}// end anonymous namespace
1385
1386staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,
1387 LSRUse::KindType Kind, MemAccessTy AccessTy,
1388GlobalValue *BaseGV, Immediate BaseOffset,
1389bool HasBaseReg, int64_t Scale,
1390Instruction *Fixup =nullptr);
1391
1392staticunsignedgetSetupCost(constSCEV *Reg,unsignedDepth) {
1393if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1394return 1;
1395if (Depth == 0)
1396return 0;
1397if (constauto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1398returngetSetupCost(S->getStart(),Depth - 1);
1399if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1400returngetSetupCost(S->getOperand(),Depth - 1);
1401if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
1402return std::accumulate(S->operands().begin(), S->operands().end(), 0,
1403 [&](unsigned i,constSCEV *Reg) {
1404 return i + getSetupCost(Reg, Depth - 1);
1405 });
1406if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
1407returngetSetupCost(S->getLHS(),Depth - 1) +
1408getSetupCost(S->getRHS(),Depth - 1);
1409return 0;
1410}
1411
1412/// Tally up interesting quantities from the given register.
1413void Cost::RateRegister(const Formula &F,constSCEV *Reg,
1414SmallPtrSetImpl<const SCEV *> &Regs) {
1415if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1416// If this is an addrec for another loop, it should be an invariant
1417// with respect to L since L is the innermost loop (at least
1418// for now LSR only handles innermost loops).
1419if (AR->getLoop() != L) {
1420// If the AddRec exists, consider it's register free and leave it alone.
1421if (isExistingPhi(AR, *SE) && AMK !=TTI::AMK_PostIndexed)
1422return;
1423
1424// It is bad to allow LSR for current loop to add induction variables
1425// for its sibling loops.
1426if (!AR->getLoop()->contains(L)) {
1427 Lose();
1428return;
1429 }
1430
1431// Otherwise, it will be an invariant with respect to Loop L.
1432 ++C.NumRegs;
1433return;
1434 }
1435
1436unsigned LoopCost = 1;
1437if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
1438TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
1439
1440// If the step size matches the base offset, we could use pre-indexed
1441// addressing.
1442if (AMK ==TTI::AMK_PreIndexed &&F.BaseOffset.isFixed()) {
1443if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1444if (Step->getAPInt() ==F.BaseOffset.getFixedValue())
1445 LoopCost = 0;
1446 }elseif (AMK ==TTI::AMK_PostIndexed) {
1447constSCEV *LoopStep = AR->getStepRecurrence(*SE);
1448if (isa<SCEVConstant>(LoopStep)) {
1449constSCEV *LoopStart = AR->getStart();
1450if (!isa<SCEVConstant>(LoopStart) &&
1451 SE->isLoopInvariant(LoopStart, L))
1452 LoopCost = 0;
1453 }
1454 }
1455 }
1456C.AddRecCost += LoopCost;
1457
1458// Add the step value register, if it needs one.
1459// TODO: The non-affine case isn't precisely modeled here.
1460if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1461if (!Regs.count(AR->getOperand(1))) {
1462 RateRegister(F, AR->getOperand(1), Regs);
1463if (isLoser())
1464return;
1465 }
1466 }
1467 }
1468 ++C.NumRegs;
1469
1470// Rough heuristic; favor registers which don't require extra setup
1471// instructions in the preheader.
1472C.SetupCost +=getSetupCost(Reg,SetupCostDepthLimit);
1473// Ensure we don't, even with the recusion limit, produce invalid costs.
1474C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
1475
1476C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1477 SE->hasComputableLoopEvolution(Reg, L);
1478}
1479
1480/// Record this register in the set. If we haven't seen it before, rate
1481/// it. Optional LoserRegs provides a way to declare any formula that refers to
1482/// one of those regs an instant loser.
1483void Cost::RatePrimaryRegister(const Formula &F,constSCEV *Reg,
1484SmallPtrSetImpl<const SCEV *> &Regs,
1485SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1486if (LoserRegs && LoserRegs->count(Reg)) {
1487 Lose();
1488return;
1489 }
1490if (Regs.insert(Reg).second) {
1491 RateRegister(F, Reg, Regs);
1492if (LoserRegs && isLoser())
1493 LoserRegs->insert(Reg);
1494 }
1495}
1496
1497void Cost::RateFormula(const Formula &F,
1498SmallPtrSetImpl<const SCEV *> &Regs,
1499constDenseSet<const SCEV *> &VisitedRegs,
1500const LSRUse &LU,
1501SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1502if (isLoser())
1503return;
1504assert(F.isCanonical(*L) &&"Cost is accurate only for canonical formula");
1505// Tally up the registers.
1506unsigned PrevAddRecCost =C.AddRecCost;
1507unsigned PrevNumRegs =C.NumRegs;
1508unsigned PrevNumBaseAdds =C.NumBaseAdds;
1509if (constSCEV *ScaledReg =F.ScaledReg) {
1510if (VisitedRegs.count(ScaledReg)) {
1511 Lose();
1512return;
1513 }
1514 RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
1515if (isLoser())
1516return;
1517 }
1518for (constSCEV *BaseReg :F.BaseRegs) {
1519if (VisitedRegs.count(BaseReg)) {
1520 Lose();
1521return;
1522 }
1523 RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
1524if (isLoser())
1525return;
1526 }
1527
1528// Determine how many (unfolded) adds we'll need inside the loop.
1529size_t NumBaseParts =F.getNumRegs();
1530if (NumBaseParts > 1)
1531// Do not count the base and a possible second register if the target
1532// allows to fold 2 registers.
1533C.NumBaseAdds +=
1534 NumBaseParts - (1 + (F.Scale &&isAMCompletelyFolded(*TTI, LU,F)));
1535C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
1536
1537// Accumulate non-free scaling amounts.
1538C.ScaleCost += *getScalingFactorCost(*TTI, LU,F, *L).getValue();
1539
1540// Tally up the non-zero immediates.
1541for (const LSRFixup &Fixup : LU.Fixups) {
1542if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {
1543 ImmediateOffset =Fixup.Offset.addUnsigned(F.BaseOffset);
1544if (F.BaseGV)
1545C.ImmCost += 64;// Handle symbolic values conservatively.
1546// TODO: This should probably be the pointer size.
1547elseif (Offset.isNonZero())
1548C.ImmCost +=
1549APInt(64,Offset.getKnownMinValue(),true).getSignificantBits();
1550
1551// Check with target if this offset with this instruction is
1552// specifically not supported.
1553if (LU.Kind == LSRUse::Address &&Offset.isNonZero() &&
1554 !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy,F.BaseGV,
1555Offset,F.HasBaseReg,F.Scale,Fixup.UserInst))
1556C.NumBaseAdds++;
1557 }else {
1558// Incompatible immediate type, increase cost to avoid using
1559C.ImmCost += 2048;
1560 }
1561 }
1562
1563// If we don't count instruction cost exit here.
1564if (!InsnsCost) {
1565assert(isValid() &&"invalid cost");
1566return;
1567 }
1568
1569// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
1570// additional instruction (at least fill).
1571// TODO: Need distinguish register class?
1572unsigned TTIRegNum =TTI->getNumberOfRegisters(
1573TTI->getRegisterClassForType(false,F.getType())) - 1;
1574if (C.NumRegs > TTIRegNum) {
1575// Cost already exceeded TTIRegNum, then only newly added register can add
1576// new instructions.
1577if (PrevNumRegs > TTIRegNum)
1578C.Insns += (C.NumRegs - PrevNumRegs);
1579else
1580C.Insns += (C.NumRegs - TTIRegNum);
1581 }
1582
1583// If ICmpZero formula ends with not 0, it could not be replaced by
1584// just add or sub. We'll need to compare final result of AddRec.
1585// That means we'll need an additional instruction. But if the target can
1586// macro-fuse a compare with a branch, don't count this extra instruction.
1587// For -10 + {0, +, 1}:
1588// i = i + 1;
1589// cmp i, 10
1590//
1591// For {-10, +, 1}:
1592// i = i + 1;
1593if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
1594 !TTI->canMacroFuseCmp())
1595C.Insns++;
1596// Each new AddRec adds 1 instruction to calculation.
1597C.Insns += (C.AddRecCost - PrevAddRecCost);
1598
1599// BaseAdds adds instructions for unfolded registers.
1600if (LU.Kind != LSRUse::ICmpZero)
1601C.Insns +=C.NumBaseAdds - PrevNumBaseAdds;
1602assert(isValid() &&"invalid cost");
1603}
1604
1605/// Set this cost to a losing value.
1606void Cost::Lose() {
1607C.Insns = std::numeric_limits<unsigned>::max();
1608C.NumRegs = std::numeric_limits<unsigned>::max();
1609C.AddRecCost = std::numeric_limits<unsigned>::max();
1610C.NumIVMuls = std::numeric_limits<unsigned>::max();
1611C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1612C.ImmCost = std::numeric_limits<unsigned>::max();
1613C.SetupCost = std::numeric_limits<unsigned>::max();
1614C.ScaleCost = std::numeric_limits<unsigned>::max();
1615}
1616
1617/// Choose the lower cost.
1618bool Cost::isLess(constCost &Other) const{
1619if (InsnsCost.getNumOccurrences() > 0 &&InsnsCost &&
1620C.Insns !=Other.C.Insns)
1621returnC.Insns <Other.C.Insns;
1622returnTTI->isLSRCostLess(C,Other.C);
1623}
1624
1625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1626voidCost::print(raw_ostream &OS) const{
1627if (InsnsCost)
1628OS <<C.Insns <<" instruction" << (C.Insns == 1 ?" " :"s ");
1629OS <<C.NumRegs <<" reg" << (C.NumRegs == 1 ?"" :"s");
1630if (C.AddRecCost != 0)
1631OS <<", with addrec cost " <<C.AddRecCost;
1632if (C.NumIVMuls != 0)
1633OS <<", plus " <<C.NumIVMuls <<" IV mul"
1634 << (C.NumIVMuls == 1 ?"" :"s");
1635if (C.NumBaseAdds != 0)
1636OS <<", plus " <<C.NumBaseAdds <<" base add"
1637 << (C.NumBaseAdds == 1 ?"" :"s");
1638if (C.ScaleCost != 0)
1639OS <<", plus " <<C.ScaleCost <<" scale cost";
1640if (C.ImmCost != 0)
1641OS <<", plus " <<C.ImmCost <<" imm cost";
1642if (C.SetupCost != 0)
1643OS <<", plus " <<C.SetupCost <<" setup cost";
1644}
1645
1646LLVM_DUMP_METHODvoid Cost::dump() const{
1647print(errs());errs() <<'\n';
1648}
1649#endif
1650
1651/// Test whether this fixup always uses its value outside of the given loop.
1652bool LSRFixup::isUseFullyOutsideLoop(constLoop *L) const{
1653// PHI nodes use their value in their incoming blocks.
1654if (constPHINode *PN = dyn_cast<PHINode>(UserInst)) {
1655for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1656if (PN->getIncomingValue(i) == OperandValToReplace &&
1657L->contains(PN->getIncomingBlock(i)))
1658returnfalse;
1659returntrue;
1660 }
1661
1662return !L->contains(UserInst);
1663}
1664
1665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1666void LSRFixup::print(raw_ostream &OS) const{
1667OS <<"UserInst=";
1668// Store is common and interesting enough to be worth special-casing.
1669if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1670OS <<"store ";
1671Store->getOperand(0)->printAsOperand(OS,/*PrintType=*/false);
1672 }elseif (UserInst->getType()->isVoidTy())
1673OS << UserInst->getOpcodeName();
1674else
1675 UserInst->printAsOperand(OS,/*PrintType=*/false);
1676
1677OS <<", OperandValToReplace=";
1678 OperandValToReplace->printAsOperand(OS,/*PrintType=*/false);
1679
1680for (constLoop *PIL : PostIncLoops) {
1681OS <<", PostIncLoop=";
1682 PIL->getHeader()->printAsOperand(OS,/*PrintType=*/false);
1683 }
1684
1685if (Offset.isNonZero())
1686OS <<", Offset=" <<Offset;
1687}
1688
1689LLVM_DUMP_METHODvoid LSRFixup::dump() const{
1690print(errs());errs() <<'\n';
1691}
1692#endif
1693
1694/// Test whether this use as a formula which has the same registers as the given
1695/// formula.
1696bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const{
1697SmallVector<const SCEV *, 4>Key =F.BaseRegs;
1698if (F.ScaledReg)Key.push_back(F.ScaledReg);
1699// Unstable sort by host order ok, because this is only used for uniquifying.
1700llvm::sort(Key);
1701return Uniquifier.count(Key);
1702}
1703
1704/// The function returns a probability of selecting formula without Reg.
1705float LSRUse::getNotSelectedProbability(constSCEV *Reg) const{
1706unsigned FNum = 0;
1707for (const Formula &F : Formulae)
1708if (F.referencesReg(Reg))
1709 FNum++;
1710return ((float)(Formulae.size() - FNum)) / Formulae.size();
1711}
1712
1713/// If the given formula has not yet been inserted, add it to the list, and
1714/// return true. Return false otherwise. The formula must be in canonical form.
1715bool LSRUse::InsertFormula(const Formula &F,constLoop &L) {
1716assert(F.isCanonical(L) &&"Invalid canonical representation");
1717
1718if (!Formulae.empty() && RigidFormula)
1719returnfalse;
1720
1721SmallVector<const SCEV *, 4>Key =F.BaseRegs;
1722if (F.ScaledReg)Key.push_back(F.ScaledReg);
1723// Unstable sort by host order ok, because this is only used for uniquifying.
1724llvm::sort(Key);
1725
1726if (!Uniquifier.insert(Key).second)
1727returnfalse;
1728
1729// Using a register to hold the value of 0 is not profitable.
1730assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1731"Zero allocated in a scaled register!");
1732#ifndef NDEBUG
1733for (constSCEV *BaseReg :F.BaseRegs)
1734assert(!BaseReg->isZero() &&"Zero allocated in a base register!");
1735#endif
1736
1737// Add the formula to the list.
1738 Formulae.push_back(F);
1739
1740// Record registers now being used by this use.
1741 Regs.insert(F.BaseRegs.begin(),F.BaseRegs.end());
1742if (F.ScaledReg)
1743 Regs.insert(F.ScaledReg);
1744
1745returntrue;
1746}
1747
1748/// Remove the given formula from this use's list.
1749void LSRUse::DeleteFormula(Formula &F) {
1750if (&F != &Formulae.back())
1751std::swap(F, Formulae.back());
1752 Formulae.pop_back();
1753}
1754
1755/// Recompute the Regs field, and update RegUses.
1756void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1757// Now that we've filtered out some formulae, recompute the Regs set.
1758SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1759 Regs.clear();
1760for (const Formula &F : Formulae) {
1761if (F.ScaledReg) Regs.insert(F.ScaledReg);
1762 Regs.insert(F.BaseRegs.begin(),F.BaseRegs.end());
1763 }
1764
1765// Update the RegTracker.
1766for (constSCEV *S : OldRegs)
1767if (!Regs.count(S))
1768 RegUses.dropRegister(S, LUIdx);
1769}
1770
1771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1772void LSRUse::print(raw_ostream &OS) const{
1773OS <<"LSR Use: Kind=";
1774switch (Kind) {
1775caseBasic:OS <<"Basic";break;
1776case Special:OS <<"Special";break;
1777case ICmpZero:OS <<"ICmpZero";break;
1778caseAddress:
1779OS <<"Address of ";
1780if (AccessTy.MemTy->isPointerTy())
1781OS <<"pointer";// the full pointer type could be really verbose
1782else {
1783OS << *AccessTy.MemTy;
1784 }
1785
1786OS <<" in addrspace(" << AccessTy.AddrSpace <<')';
1787 }
1788
1789OS <<", Offsets={";
1790bool NeedComma =false;
1791for (const LSRFixup &Fixup : Fixups) {
1792if (NeedComma)OS <<',';
1793OS <<Fixup.Offset;
1794 NeedComma =true;
1795 }
1796OS <<'}';
1797
1798if (AllFixupsOutsideLoop)
1799OS <<", all-fixups-outside-loop";
1800
1801if (WidestFixupType)
1802OS <<", widest fixup type: " << *WidestFixupType;
1803}
1804
1805LLVM_DUMP_METHODvoid LSRUse::dump() const{
1806print(errs());errs() <<'\n';
1807}
1808#endif
1809
1810staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,
1811 LSRUse::KindType Kind, MemAccessTy AccessTy,
1812GlobalValue *BaseGV, Immediate BaseOffset,
1813bool HasBaseReg, int64_t Scale,
1814Instruction *Fixup/* = nullptr */) {
1815switch (Kind) {
1816case LSRUse::Address: {
1817 int64_t FixedOffset =
1818 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1819 int64_t ScalableOffset =
1820 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1821returnTTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1822 HasBaseReg, Scale, AccessTy.AddrSpace,
1823Fixup, ScalableOffset);
1824 }
1825case LSRUse::ICmpZero:
1826// There's not even a target hook for querying whether it would be legal to
1827// fold a GV into an ICmp.
1828if (BaseGV)
1829returnfalse;
1830
1831// ICmp only has two operands; don't allow more than two non-trivial parts.
1832if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1833returnfalse;
1834
1835// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1836// putting the scaled register in the other operand of the icmp.
1837if (Scale != 0 && Scale != -1)
1838returnfalse;
1839
1840// If we have low-level target information, ask the target if it can fold an
1841// integer immediate on an icmp.
1842if (BaseOffset.isNonZero()) {
1843// We don't have an interface to query whether the target supports
1844// icmpzero against scalable quantities yet.
1845if (BaseOffset.isScalable())
1846returnfalse;
1847
1848// We have one of:
1849// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1850// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1851// Offs is the ICmp immediate.
1852if (Scale == 0)
1853// The cast does the right thing with
1854// std::numeric_limits<int64_t>::min().
1855 BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());
1856returnTTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1857 }
1858
1859// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1860returntrue;
1861
1862case LSRUse::Basic:
1863// Only handle single-register values.
1864return !BaseGV && Scale == 0 && BaseOffset.isZero();
1865
1866case LSRUse::Special:
1867// Special case Basic to handle -1 scales.
1868return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1869 }
1870
1871llvm_unreachable("Invalid LSRUse Kind!");
1872}
1873
1874staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,
1875 Immediate MinOffset, Immediate MaxOffset,
1876 LSRUse::KindType Kind, MemAccessTy AccessTy,
1877GlobalValue *BaseGV, Immediate BaseOffset,
1878bool HasBaseReg, int64_t Scale) {
1879if (BaseOffset.isNonZero() &&
1880 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1881 BaseOffset.isScalable() != MaxOffset.isScalable()))
1882returnfalse;
1883// Check for overflow.
1884 int64_tBase = BaseOffset.getKnownMinValue();
1885 int64_t Min = MinOffset.getKnownMinValue();
1886 int64_t Max = MaxOffset.getKnownMinValue();
1887if (((int64_t)((uint64_t)Base + Min) >Base) != (Min > 0))
1888returnfalse;
1889 MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());
1890if (((int64_t)((uint64_t)Base + Max) >Base) != (Max > 0))
1891returnfalse;
1892 MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());
1893
1894returnisAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
1895 HasBaseReg, Scale) &&
1896isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
1897 HasBaseReg, Scale);
1898}
1899
1900staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,
1901 Immediate MinOffset, Immediate MaxOffset,
1902 LSRUse::KindType Kind, MemAccessTy AccessTy,
1903const Formula &F,constLoop &L) {
1904// For the purpose of isAMCompletelyFolded either having a canonical formula
1905// or a scale not equal to zero is correct.
1906// Problems may arise from non canonical formulae having a scale == 0.
1907// Strictly speaking it would best to just rely on canonical formulae.
1908// However, when we generate the scaled formulae, we first check that the
1909// scaling factor is profitable before computing the actual ScaledReg for
1910// compile time sake.
1911assert((F.isCanonical(L) ||F.Scale != 0));
1912returnisAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1913F.BaseGV,F.BaseOffset,F.HasBaseReg,F.Scale);
1914}
1915
1916/// Test whether we know how to expand the current formula.
1917staticboolisLegalUse(constTargetTransformInfo &TTI, Immediate MinOffset,
1918 Immediate MaxOffset, LSRUse::KindType Kind,
1919 MemAccessTy AccessTy,GlobalValue *BaseGV,
1920 Immediate BaseOffset,bool HasBaseReg, int64_t Scale) {
1921// We know how to expand completely foldable formulae.
1922returnisAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1923 BaseOffset, HasBaseReg, Scale) ||
1924// Or formulae that use a base register produced by a sum of base
1925// registers.
1926 (Scale == 1 &&
1927isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1928 BaseGV, BaseOffset,true, 0));
1929}
1930
1931staticboolisLegalUse(constTargetTransformInfo &TTI, Immediate MinOffset,
1932 Immediate MaxOffset, LSRUse::KindType Kind,
1933 MemAccessTy AccessTy,const Formula &F) {
1934returnisLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy,F.BaseGV,
1935F.BaseOffset,F.HasBaseReg,F.Scale);
1936}
1937
1938staticboolisLegalAddImmediate(constTargetTransformInfo &TTI,
1939 ImmediateOffset) {
1940if (Offset.isScalable())
1941returnTTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());
1942
1943returnTTI.isLegalAddImmediate(Offset.getFixedValue());
1944}
1945
1946staticboolisAMCompletelyFolded(constTargetTransformInfo &TTI,
1947const LSRUse &LU,const Formula &F) {
1948// Target may want to look at the user instructions.
1949if (LU.Kind == LSRUse::Address &&TTI.LSRWithInstrQueries()) {
1950for (const LSRFixup &Fixup : LU.Fixups)
1951if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy,F.BaseGV,
1952 (F.BaseOffset +Fixup.Offset),F.HasBaseReg,
1953F.Scale,Fixup.UserInst))
1954returnfalse;
1955returntrue;
1956 }
1957
1958returnisAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1959 LU.AccessTy,F.BaseGV,F.BaseOffset,F.HasBaseReg,
1960F.Scale);
1961}
1962
1963staticInstructionCostgetScalingFactorCost(constTargetTransformInfo &TTI,
1964const LSRUse &LU,const Formula &F,
1965constLoop &L) {
1966if (!F.Scale)
1967return 0;
1968
1969// If the use is not completely folded in that instruction, we will have to
1970// pay an extra cost only for scale != 1.
1971if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1972 LU.AccessTy,F, L))
1973returnF.Scale != 1;
1974
1975switch (LU.Kind) {
1976case LSRUse::Address: {
1977// Check the scaling factor cost with both the min and max offsets.
1978 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1979if (F.BaseOffset.isScalable()) {
1980 ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();
1981 ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1982 }else {
1983 FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();
1984 FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();
1985 }
1986InstructionCost ScaleCostMinOffset =TTI.getScalingFactorCost(
1987 LU.AccessTy.MemTy,F.BaseGV,StackOffset::get(FixedMin, ScalableMin),
1988F.HasBaseReg,F.Scale, LU.AccessTy.AddrSpace);
1989InstructionCost ScaleCostMaxOffset =TTI.getScalingFactorCost(
1990 LU.AccessTy.MemTy,F.BaseGV,StackOffset::get(FixedMax, ScalableMax),
1991F.HasBaseReg,F.Scale, LU.AccessTy.AddrSpace);
1992
1993assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
1994"Legal addressing mode has an illegal cost!");
1995return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1996 }
1997case LSRUse::ICmpZero:
1998case LSRUse::Basic:
1999case LSRUse::Special:
2000// The use is completely folded, i.e., everything is folded into the
2001// instruction.
2002return 0;
2003 }
2004
2005llvm_unreachable("Invalid LSRUse Kind!");
2006}
2007
2008staticboolisAlwaysFoldable(constTargetTransformInfo &TTI,
2009 LSRUse::KindType Kind, MemAccessTy AccessTy,
2010GlobalValue *BaseGV, Immediate BaseOffset,
2011bool HasBaseReg) {
2012// Fast-path: zero is always foldable.
2013if (BaseOffset.isZero() && !BaseGV)
2014returntrue;
2015
2016// Conservatively, create an address with an immediate and a
2017// base and a scale.
2018 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2019
2020// Canonicalize a scale of 1 to a base register if the formula doesn't
2021// already have a base register.
2022if (!HasBaseReg && Scale == 1) {
2023 Scale = 0;
2024 HasBaseReg =true;
2025 }
2026
2027// FIXME: Try with + without a scale? Maybe based on TTI?
2028// I think basereg + scaledreg + immediateoffset isn't a good 'conservative'
2029// default for many architectures, not just AArch64 SVE. More investigation
2030// needed later to determine if this should be used more widely than just
2031// on scalable types.
2032if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2033 AccessTy.MemTy && AccessTy.MemTy->isScalableTy() &&DropScaledForVScale)
2034 Scale = 0;
2035
2036returnisAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
2037 HasBaseReg, Scale);
2038}
2039
2040staticboolisAlwaysFoldable(constTargetTransformInfo &TTI,
2041ScalarEvolution &SE, Immediate MinOffset,
2042 Immediate MaxOffset, LSRUse::KindType Kind,
2043 MemAccessTy AccessTy,constSCEV *S,
2044bool HasBaseReg) {
2045// Fast-path: zero is always foldable.
2046if (S->isZero())returntrue;
2047
2048// Conservatively, create an address with an immediate and a
2049// base and a scale.
2050 Immediate BaseOffset =ExtractImmediate(S, SE);
2051GlobalValue *BaseGV =ExtractSymbol(S, SE);
2052
2053// If there's anything else involved, it's not foldable.
2054if (!S->isZero())returnfalse;
2055
2056// Fast-path: zero is always foldable.
2057if (BaseOffset.isZero() && !BaseGV)
2058returntrue;
2059
2060if (BaseOffset.isScalable())
2061returnfalse;
2062
2063// Conservatively, create an address with an immediate and a
2064// base and a scale.
2065 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2066
2067returnisAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
2068 BaseOffset, HasBaseReg, Scale);
2069}
2070
2071namespace{
2072
2073/// An individual increment in a Chain of IV increments. Relate an IV user to
2074/// an expression that computes the IV it uses from the IV used by the previous
2075/// link in the Chain.
2076///
2077/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
2078/// original IVOperand. The head of the chain's IVOperand is only valid during
2079/// chain collection, before LSR replaces IV users. During chain generation,
2080/// IncExpr can be used to find the new IVOperand that computes the same
2081/// expression.
2082structIVInc {
2083Instruction *UserInst;
2084Value* IVOperand;
2085constSCEV *IncExpr;
2086
2087 IVInc(Instruction *U,Value *O,constSCEV *E)
2088 : UserInst(U), IVOperand(O), IncExpr(E) {}
2089};
2090
2091// The list of IV increments in program order. We typically add the head of a
2092// chain without finding subsequent links.
2093structIVChain {
2094SmallVector<IVInc, 1> Incs;
2095constSCEV *ExprBase =nullptr;
2096
2097 IVChain() =default;
2098 IVChain(const IVInc &Head,constSCEV *Base)
2099 : Incs(1, Head), ExprBase(Base) {}
2100
2101usingconst_iterator =SmallVectorImpl<IVInc>::const_iterator;
2102
2103// Return the first increment in the chain.
2104const_iteratorbegin() const{
2105assert(!Incs.empty());
2106return std::next(Incs.begin());
2107 }
2108const_iteratorend() const{
2109return Incs.end();
2110 }
2111
2112// Returns true if this chain contains any increments.
2113bool hasIncs() const{return Incs.size() >= 2; }
2114
2115// Add an IVInc to the end of this chain.
2116voidadd(const IVInc &X) { Incs.push_back(X); }
2117
2118// Returns the last UserInst in the chain.
2119Instruction *tailUserInst() const{return Incs.back().UserInst; }
2120
2121// Returns true if IncExpr can be profitably added to this chain.
2122bool isProfitableIncrement(constSCEV *OperExpr,
2123constSCEV *IncExpr,
2124ScalarEvolution&);
2125};
2126
2127/// Helper for CollectChains to track multiple IV increment uses. Distinguish
2128/// between FarUsers that definitely cross IV increments and NearUsers that may
2129/// be used between IV increments.
2130structChainUsers {
2131SmallPtrSet<Instruction*, 4> FarUsers;
2132SmallPtrSet<Instruction*, 4> NearUsers;
2133};
2134
2135/// This class holds state for the main loop strength reduction logic.
2136classLSRInstance {
2137IVUsers &IU;
2138ScalarEvolution &SE;
2139DominatorTree &DT;
2140LoopInfo &LI;
2141AssumptionCache &AC;
2142TargetLibraryInfo &TLI;
2143constTargetTransformInfo &TTI;
2144Loop *constL;
2145MemorySSAUpdater *MSSAU;
2146TTI::AddressingModeKind AMK;
2147mutableSCEVExpanderRewriter;
2148bool Changed =false;
2149
2150 /// This is the insert position that the current loop's induction variable
2151 /// increment should be placed. In simple loops, this is the latch block's
2152 /// terminator. But in more complicated cases, this is a position which will
2153 /// dominate all the in-loop post-increment users.
2154Instruction *IVIncInsertPos =nullptr;
2155
2156 /// Interesting factors between use strides.
2157 ///
2158 /// We explicitly use a SetVector which contains a SmallSet, instead of the
2159 /// default, a SmallDenseSet, because we need to use the full range of
2160 /// int64_ts, and there's currently no good way of doing that with
2161 /// SmallDenseSet.
2162SetVector<int64_t, SmallVector<int64_t, 8>,SmallSet<int64_t, 8>> Factors;
2163
2164 /// The cost of the current SCEV, the best solution by LSR will be dropped if
2165 /// the solution is not profitable.
2166Cost BaselineCost;
2167
2168 /// Interesting use types, to facilitate truncation reuse.
2169SmallSetVector<Type *, 4>Types;
2170
2171 /// The list of interesting uses.
2172mutableSmallVector<LSRUse, 16>Uses;
2173
2174 /// Track which uses use which register candidates.
2175 RegUseTracker RegUses;
2176
2177// Limit the number of chains to avoid quadratic behavior. We don't expect to
2178// have more than a few IV increment chains in a loop. Missing a Chain falls
2179// back to normal LSR behavior for those uses.
2180staticconstunsigned MaxChains = 8;
2181
2182 /// IV users can form a chain of IV increments.
2183SmallVector<IVChain, MaxChains> IVChainVec;
2184
2185 /// IV users that belong to profitable IVChains.
2186SmallPtrSet<Use*, MaxChains> IVIncSet;
2187
2188 /// Induction variables that were generated and inserted by the SCEV Expander.
2189SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2190
2191// Inserting instructions in the loop and using them as PHI's input could
2192// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
2193// corresponding incoming block is not loop exiting). So collect all such
2194// instructions to form LCSSA for them later.
2195SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2196
2197void OptimizeShadowIV();
2198bool FindIVUserForCond(ICmpInst *Cond,IVStrideUse *&CondUse);
2199ICmpInst *OptimizeMax(ICmpInst *Cond,IVStrideUse* &CondUse);
2200void OptimizeLoopTermCond();
2201
2202void ChainInstruction(Instruction *UserInst,Instruction *IVOper,
2203SmallVectorImpl<ChainUsers> &ChainUsersVec);
2204void FinalizeChain(IVChain &Chain);
2205void CollectChains();
2206void GenerateIVChain(const IVChain &Chain,
2207SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2208
2209void CollectInterestingTypesAndFactors();
2210void CollectFixupsAndInitialFormulae();
2211
2212// Support for sharing of LSRUses between LSRFixups.
2213usingUseMapTy =DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2214 UseMapTy UseMap;
2215
2216bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,bool HasBaseReg,
2217 LSRUse::KindType Kind, MemAccessTy AccessTy);
2218
2219 std::pair<size_t, Immediate> getUse(constSCEV *&Expr, LSRUse::KindType Kind,
2220 MemAccessTy AccessTy);
2221
2222void DeleteUse(LSRUse &LU,size_t LUIdx);
2223
2224 LSRUse *FindUseWithSimilarFormula(const Formula &F,const LSRUse &OrigLU);
2225
2226void InsertInitialFormula(constSCEV *S, LSRUse &LU,size_t LUIdx);
2227void InsertSupplementalFormula(constSCEV *S, LSRUse &LU,size_t LUIdx);
2228void CountRegisters(const Formula &F,size_t LUIdx);
2229bool InsertFormula(LSRUse &LU,unsigned LUIdx,const Formula &F);
2230
2231void CollectLoopInvariantFixupsAndFormulae();
2232
2233void GenerateReassociations(LSRUse &LU,unsigned LUIdx, FormulaBase,
2234unsignedDepth = 0);
2235
2236void GenerateReassociationsImpl(LSRUse &LU,unsigned LUIdx,
2237const Formula &Base,unsignedDepth,
2238size_tIdx,bool IsScaledReg =false);
2239void GenerateCombinations(LSRUse &LU,unsigned LUIdx, FormulaBase);
2240void GenerateSymbolicOffsetsImpl(LSRUse &LU,unsigned LUIdx,
2241const Formula &Base,size_tIdx,
2242bool IsScaledReg =false);
2243void GenerateSymbolicOffsets(LSRUse &LU,unsigned LUIdx, FormulaBase);
2244void GenerateConstantOffsetsImpl(LSRUse &LU,unsigned LUIdx,
2245const Formula &Base,
2246constSmallVectorImpl<Immediate> &Worklist,
2247size_tIdx,bool IsScaledReg =false);
2248void GenerateConstantOffsets(LSRUse &LU,unsigned LUIdx, FormulaBase);
2249void GenerateICmpZeroScales(LSRUse &LU,unsigned LUIdx, FormulaBase);
2250void GenerateScales(LSRUse &LU,unsigned LUIdx, FormulaBase);
2251void GenerateTruncates(LSRUse &LU,unsigned LUIdx, FormulaBase);
2252void GenerateCrossUseConstantOffsets();
2253void GenerateAllReuseFormulae();
2254
2255void FilterOutUndesirableDedicatedRegisters();
2256
2257size_t EstimateSearchSpaceComplexity()const;
2258void NarrowSearchSpaceByDetectingSupersets();
2259void NarrowSearchSpaceByCollapsingUnrolledCode();
2260void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2261void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2262void NarrowSearchSpaceByFilterPostInc();
2263void NarrowSearchSpaceByDeletingCostlyFormulas();
2264void NarrowSearchSpaceByPickingWinnerRegs();
2265void NarrowSearchSpaceUsingHeuristics();
2266
2267void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2268Cost &SolutionCost,
2269SmallVectorImpl<const Formula *> &Workspace,
2270constCost &CurCost,
2271constSmallPtrSet<const SCEV *, 16> &CurRegs,
2272DenseSet<const SCEV *> &VisitedRegs)const;
2273void Solve(SmallVectorImpl<const Formula *> &Solution)const;
2274
2275BasicBlock::iterator
2276 HoistInsertPosition(BasicBlock::iterator IP,
2277constSmallVectorImpl<Instruction *> &Inputs)const;
2278BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
2279const LSRFixup &LF,
2280const LSRUse &LU)const;
2281
2282Value *Expand(const LSRUse &LU,const LSRFixup &LF,const Formula &F,
2283BasicBlock::iterator IP,
2284SmallVectorImpl<WeakTrackingVH> &DeadInsts)const;
2285void RewriteForPHI(PHINode *PN,const LSRUse &LU,const LSRFixup &LF,
2286const Formula &F,
2287SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2288void Rewrite(const LSRUse &LU,const LSRFixup &LF,const Formula &F,
2289SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2290void ImplementSolution(constSmallVectorImpl<const Formula *> &Solution);
2291
2292public:
2293 LSRInstance(Loop *L,IVUsers &IU,ScalarEvolution &SE,DominatorTree &DT,
2294LoopInfo &LI,constTargetTransformInfo &TTI,AssumptionCache &AC,
2295TargetLibraryInfo &TLI,MemorySSAUpdater *MSSAU);
2296
2297bool getChanged() const{return Changed; }
2298constSmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const{
2299return ScalarEvolutionIVs;
2300 }
2301
2302void print_factors_and_types(raw_ostream &OS)const;
2303void print_fixups(raw_ostream &OS)const;
2304void print_uses(raw_ostream &OS)const;
2305voidprint(raw_ostream &OS)const;
2306voiddump()const;
2307};
2308
2309}// end anonymous namespace
2310
2311/// If IV is used in a int-to-float cast inside the loop then try to eliminate
2312/// the cast operation.
2313void LSRInstance::OptimizeShadowIV() {
2314constSCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2315if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2316return;
2317
2318for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
2319 UI != E;/* empty */) {
2320IVUsers::const_iterator CandidateUI = UI;
2321 ++UI;
2322Instruction *ShadowUse = CandidateUI->getUser();
2323Type *DestTy =nullptr;
2324bool IsSigned =false;
2325
2326/* If shadow use is a int->float cast then insert a second IV
2327 to eliminate this cast.
2328
2329 for (unsigned i = 0; i < n; ++i)
2330 foo((double)i);
2331
2332 is transformed into
2333
2334 double d = 0.0;
2335 for (unsigned i = 0; i < n; ++i, ++d)
2336 foo(d);
2337 */
2338if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2339 IsSigned =false;
2340 DestTy = UCast->getDestTy();
2341 }
2342elseif (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2343 IsSigned =true;
2344 DestTy = SCast->getDestTy();
2345 }
2346if (!DestTy)continue;
2347
2348// If target does not support DestTy natively then do not apply
2349// this transformation.
2350if (!TTI.isTypeLegal(DestTy))continue;
2351
2352PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
2353if (!PH)continue;
2354if (PH->getNumIncomingValues() != 2)continue;
2355
2356// If the calculation in integers overflows, the result in FP type will
2357// differ. So we only can do this transformation if we are guaranteed to not
2358// deal with overflowing values
2359constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
2360if (!AR)continue;
2361if (IsSigned && !AR->hasNoSignedWrap())continue;
2362if (!IsSigned && !AR->hasNoUnsignedWrap())continue;
2363
2364Type *SrcTy = PH->getType();
2365int Mantissa = DestTy->getFPMantissaWidth();
2366if (Mantissa == -1)continue;
2367if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
2368continue;
2369
2370unsignedEntry, Latch;
2371if (PH->getIncomingBlock(0) ==L->getLoopPreheader()) {
2372Entry = 0;
2373 Latch = 1;
2374 }else {
2375Entry = 1;
2376 Latch = 0;
2377 }
2378
2379ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
2380if (!Init)continue;
2381Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2382 (double)Init->getSExtValue() :
2383 (double)Init->getZExtValue());
2384
2385BinaryOperator *Incr =
2386 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
2387if (!Incr)continue;
2388if (Incr->getOpcode() != Instruction::Add
2389 && Incr->getOpcode() != Instruction::Sub)
2390continue;
2391
2392/* Initialize new IV, double d = 0.0 in above example. */
2393ConstantInt *C =nullptr;
2394if (Incr->getOperand(0) == PH)
2395C = dyn_cast<ConstantInt>(Incr->getOperand(1));
2396elseif (Incr->getOperand(1) == PH)
2397C = dyn_cast<ConstantInt>(Incr->getOperand(0));
2398else
2399continue;
2400
2401if (!C)continue;
2402
2403// Ignore negative constants, as the code below doesn't handle them
2404// correctly. TODO: Remove this restriction.
2405if (!C->getValue().isStrictlyPositive())
2406continue;
2407
2408/* Add new PHINode. */
2409PHINode *NewPH =PHINode::Create(DestTy, 2,"IV.S.", PH->getIterator());
2410 NewPH->setDebugLoc(PH->getDebugLoc());
2411
2412/* create new increment. '++d' in above example. */
2413Constant *CFP = ConstantFP::get(DestTy,C->getZExtValue());
2414BinaryOperator *NewIncr =BinaryOperator::Create(
2415 Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
2416 : Instruction::FSub,
2417 NewPH, CFP,"IV.S.next.", Incr->getIterator());
2418 NewIncr->setDebugLoc(Incr->getDebugLoc());
2419
2420 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
2421 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
2422
2423/* Remove cast operation */
2424 ShadowUse->replaceAllUsesWith(NewPH);
2425 ShadowUse->eraseFromParent();
2426 Changed =true;
2427break;
2428 }
2429}
2430
2431/// If Cond has an operand that is an expression of an IV, set the IV user and
2432/// stride information and return true, otherwise return false.
2433bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,IVStrideUse *&CondUse) {
2434for (IVStrideUse &U : IU)
2435if (U.getUser() ==Cond) {
2436// NOTE: we could handle setcc instructions with multiple uses here, but
2437// InstCombine does it as well for simple uses, it's not clear that it
2438// occurs enough in real life to handle.
2439 CondUse = &U;
2440returntrue;
2441 }
2442returnfalse;
2443}
2444
2445/// Rewrite the loop's terminating condition if it uses a max computation.
2446///
2447/// This is a narrow solution to a specific, but acute, problem. For loops
2448/// like this:
2449///
2450/// i = 0;
2451/// do {
2452/// p[i] = 0.0;
2453/// } while (++i < n);
2454///
2455/// the trip count isn't just 'n', because 'n' might not be positive. And
2456/// unfortunately this can come up even for loops where the user didn't use
2457/// a C do-while loop. For example, seemingly well-behaved top-test loops
2458/// will commonly be lowered like this:
2459///
2460/// if (n > 0) {
2461/// i = 0;
2462/// do {
2463/// p[i] = 0.0;
2464/// } while (++i < n);
2465/// }
2466///
2467/// and then it's possible for subsequent optimization to obscure the if
2468/// test in such a way that indvars can't find it.
2469///
2470/// When indvars can't find the if test in loops like this, it creates a
2471/// max expression, which allows it to give the loop a canonical
2472/// induction variable:
2473///
2474/// i = 0;
2475/// max = n < 1 ? 1 : n;
2476/// do {
2477/// p[i] = 0.0;
2478/// } while (++i != max);
2479///
2480/// Canonical induction variables are necessary because the loop passes
2481/// are designed around them. The most obvious example of this is the
2482/// LoopInfo analysis, which doesn't remember trip count values. It
2483/// expects to be able to rediscover the trip count each time it is
2484/// needed, and it does this using a simple analysis that only succeeds if
2485/// the loop has a canonical induction variable.
2486///
2487/// However, when it comes time to generate code, the maximum operation
2488/// can be quite costly, especially if it's inside of an outer loop.
2489///
2490/// This function solves this problem by detecting this type of loop and
2491/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
2492/// the instructions for the maximum computation.
2493ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond,IVStrideUse* &CondUse) {
2494// Check that the loop matches the pattern we're looking for.
2495if (Cond->getPredicate() !=CmpInst::ICMP_EQ &&
2496Cond->getPredicate() !=CmpInst::ICMP_NE)
2497returnCond;
2498
2499SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
2500if (!Sel || !Sel->hasOneUse())returnCond;
2501
2502constSCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2503if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2504returnCond;
2505constSCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
2506
2507// Add one to the backedge-taken count to get the trip count.
2508constSCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
2509if (IterationCount != SE.getSCEV(Sel))returnCond;
2510
2511// Check for a max calculation that matches the pattern. There's no check
2512// for ICMP_ULE here because the comparison would be with zero, which
2513// isn't interesting.
2514CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
2515constSCEVNAryExpr *Max =nullptr;
2516if (constSCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2517 Pred = ICmpInst::ICMP_SLE;
2518Max = S;
2519 }elseif (constSCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2520 Pred = ICmpInst::ICMP_SLT;
2521Max = S;
2522 }elseif (constSCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2523 Pred = ICmpInst::ICMP_ULT;
2524Max =U;
2525 }else {
2526// No match; bail.
2527returnCond;
2528 }
2529
2530// To handle a max with more than two operands, this optimization would
2531// require additional checking and setup.
2532if (Max->getNumOperands() != 2)
2533returnCond;
2534
2535constSCEV *MaxLHS =Max->getOperand(0);
2536constSCEV *MaxRHS =Max->getOperand(1);
2537
2538// ScalarEvolution canonicalizes constants to the left. For < and >, look
2539// for a comparison with 1. For <= and >=, a comparison with zero.
2540if (!MaxLHS ||
2541 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2542returnCond;
2543
2544// Check the relevant induction variable for conformance to
2545// the pattern.
2546constSCEV *IV = SE.getSCEV(Cond->getOperand(0));
2547constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2548if (!AR || !AR->isAffine() ||
2549 AR->getStart() != One ||
2550 AR->getStepRecurrence(SE) != One)
2551returnCond;
2552
2553assert(AR->getLoop() == L &&
2554"Loop condition operand is an addrec in a different loop!");
2555
2556// Check the right operand of the select, and remember it, as it will
2557// be used in the new comparison instruction.
2558Value *NewRHS =nullptr;
2559if (ICmpInst::isTrueWhenEqual(Pred)) {
2560// Look for n+1, and grab n.
2561if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2562if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2563if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2564 NewRHS = BO->getOperand(0);
2565if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2566if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2567if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2568 NewRHS = BO->getOperand(0);
2569if (!NewRHS)
2570returnCond;
2571 }elseif (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2572 NewRHS = Sel->getOperand(1);
2573elseif (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2574 NewRHS = Sel->getOperand(2);
2575elseif (constSCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2576 NewRHS = SU->getValue();
2577else
2578// Max doesn't match expected pattern.
2579returnCond;
2580
2581// Determine the new comparison opcode. It may be signed or unsigned,
2582// and the original comparison may be either equality or inequality.
2583if (Cond->getPredicate() ==CmpInst::ICMP_EQ)
2584 Pred =CmpInst::getInversePredicate(Pred);
2585
2586// Ok, everything looks ok to change the condition into an SLT or SGE and
2587// delete the max calculation.
2588ICmpInst *NewCond =newICmpInst(Cond->getIterator(), Pred,
2589Cond->getOperand(0), NewRHS,"scmp");
2590
2591// Delete the max calculation instructions.
2592 NewCond->setDebugLoc(Cond->getDebugLoc());
2593Cond->replaceAllUsesWith(NewCond);
2594 CondUse->setUser(NewCond);
2595Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2596Cond->eraseFromParent();
2597 Sel->eraseFromParent();
2598if (Cmp->use_empty())
2599Cmp->eraseFromParent();
2600return NewCond;
2601}
2602
2603/// Change loop terminating condition to use the postinc iv when possible.
2604void
2605LSRInstance::OptimizeLoopTermCond() {
2606SmallPtrSet<Instruction *, 4> PostIncs;
2607
2608// We need a different set of heuristics for rotated and non-rotated loops.
2609// If a loop is rotated then the latch is also the backedge, so inserting
2610// post-inc expressions just before the latch is ideal. To reduce live ranges
2611// it also makes sense to rewrite terminating conditions to use post-inc
2612// expressions.
2613//
2614// If the loop is not rotated then the latch is not a backedge; the latch
2615// check is done in the loop head. Adding post-inc expressions before the
2616// latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2617// in the loop body. In this case we do *not* want to use post-inc expressions
2618// in the latch check, and we want to insert post-inc expressions before
2619// the backedge.
2620BasicBlock *LatchBlock =L->getLoopLatch();
2621SmallVector<BasicBlock*, 8> ExitingBlocks;
2622L->getExitingBlocks(ExitingBlocks);
2623if (!llvm::is_contained(ExitingBlocks, LatchBlock)) {
2624// The backedge doesn't exit the loop; treat this as a head-tested loop.
2625 IVIncInsertPos = LatchBlock->getTerminator();
2626return;
2627 }
2628
2629// Otherwise treat this as a rotated loop.
2630for (BasicBlock *ExitingBlock : ExitingBlocks) {
2631// Get the terminating condition for the loop if possible. If we
2632// can, we want to change it to use a post-incremented version of its
2633// induction variable, to allow coalescing the live ranges for the IV into
2634// one register value.
2635
2636BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2637if (!TermBr)
2638continue;
2639// FIXME: Overly conservative, termination condition could be an 'or' etc..
2640if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2641continue;
2642
2643// Search IVUsesByStride to find Cond's IVUse if there is one.
2644IVStrideUse *CondUse =nullptr;
2645ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2646if (!FindIVUserForCond(Cond, CondUse))
2647continue;
2648
2649// If the trip count is computed in terms of a max (due to ScalarEvolution
2650// being unable to find a sufficient guard, for example), change the loop
2651// comparison to use SLT or ULT instead of NE.
2652// One consequence of doing this now is that it disrupts the count-down
2653// optimization. That's not always a bad thing though, because in such
2654// cases it may still be worthwhile to avoid a max.
2655Cond = OptimizeMax(Cond, CondUse);
2656
2657// If this exiting block dominates the latch block, it may also use
2658// the post-inc value if it won't be shared with other uses.
2659// Check for dominance.
2660if (!DT.dominates(ExitingBlock, LatchBlock))
2661continue;
2662
2663// Conservatively avoid trying to use the post-inc value in non-latch
2664// exits if there may be pre-inc users in intervening blocks.
2665if (LatchBlock != ExitingBlock)
2666for (constIVStrideUse &UI : IU)
2667// Test if the use is reachable from the exiting block. This dominator
2668// query is a conservative approximation of reachability.
2669if (&UI != CondUse &&
2670 !DT.properlyDominates(UI.getUser()->getParent(), ExitingBlock)) {
2671// Conservatively assume there may be reuse if the quotient of their
2672// strides could be a legal scale.
2673constSCEV *A = IU.getStride(*CondUse, L);
2674constSCEV *B = IU.getStride(UI, L);
2675if (!A || !B)continue;
2676if (SE.getTypeSizeInBits(A->getType()) !=
2677 SE.getTypeSizeInBits(B->getType())) {
2678if (SE.getTypeSizeInBits(A->getType()) >
2679 SE.getTypeSizeInBits(B->getType()))
2680B = SE.getSignExtendExpr(B,A->getType());
2681else
2682A = SE.getSignExtendExpr(A,B->getType());
2683 }
2684if (constSCEVConstant *D =
2685 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B,A, SE))) {
2686constConstantInt *C =D->getValue();
2687// Stride of one or negative one can have reuse with non-addresses.
2688if (C->isOne() ||C->isMinusOne())
2689goto decline_post_inc;
2690// Avoid weird situations.
2691if (C->getValue().getSignificantBits() >= 64 ||
2692C->getValue().isMinSignedValue())
2693goto decline_post_inc;
2694// Check for possible scaled-address reuse.
2695if (isAddressUse(TTI, UI.getUser(), UI.getOperandValToReplace())) {
2696 MemAccessTy AccessTy =
2697getAccessType(TTI, UI.getUser(), UI.getOperandValToReplace());
2698 int64_t Scale =C->getSExtValue();
2699if (TTI.isLegalAddressingMode(AccessTy.MemTy,/*BaseGV=*/nullptr,
2700/*BaseOffset=*/0,
2701/*HasBaseReg=*/true, Scale,
2702 AccessTy.AddrSpace))
2703goto decline_post_inc;
2704 Scale = -Scale;
2705if (TTI.isLegalAddressingMode(AccessTy.MemTy,/*BaseGV=*/nullptr,
2706/*BaseOffset=*/0,
2707/*HasBaseReg=*/true, Scale,
2708 AccessTy.AddrSpace))
2709goto decline_post_inc;
2710 }
2711 }
2712 }
2713
2714LLVM_DEBUG(dbgs() <<" Change loop exiting icmp to use postinc iv: "
2715 << *Cond <<'\n');
2716
2717// It's possible for the setcc instruction to be anywhere in the loop, and
2718// possible for it to have multiple users. If it is not immediately before
2719// the exiting block branch, move it.
2720if (Cond->getNextNonDebugInstruction() != TermBr) {
2721if (Cond->hasOneUse()) {
2722Cond->moveBefore(TermBr->getIterator());
2723 }else {
2724// Clone the terminating condition and insert into the loopend.
2725ICmpInst *OldCond =Cond;
2726Cond = cast<ICmpInst>(Cond->clone());
2727Cond->setName(L->getHeader()->getName() +".termcond");
2728Cond->insertInto(ExitingBlock, TermBr->getIterator());
2729
2730// Clone the IVUse, as the old use still exists!
2731 CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2732 TermBr->replaceUsesOfWith(OldCond,Cond);
2733 }
2734 }
2735
2736// If we get to here, we know that we can transform the setcc instruction to
2737// use the post-incremented version of the IV, allowing us to coalesce the
2738// live ranges for the IV correctly.
2739 CondUse->transformToPostInc(L);
2740 Changed =true;
2741
2742 PostIncs.insert(Cond);
2743 decline_post_inc:;
2744 }
2745
2746// Determine an insertion point for the loop induction variable increment. It
2747// must dominate all the post-inc comparisons we just set up, and it must
2748// dominate the loop latch edge.
2749 IVIncInsertPos =L->getLoopLatch()->getTerminator();
2750for (Instruction *Inst : PostIncs)
2751 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2752}
2753
2754/// Determine if the given use can accommodate a fixup at the given offset and
2755/// other details. If so, update the use and return true.
2756bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2757bool HasBaseReg, LSRUse::KindType Kind,
2758 MemAccessTy AccessTy) {
2759 Immediate NewMinOffset = LU.MinOffset;
2760 Immediate NewMaxOffset = LU.MaxOffset;
2761 MemAccessTy NewAccessTy = AccessTy;
2762
2763// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2764// something conservative, however this can pessimize in the case that one of
2765// the uses will have all its uses outside the loop, for example.
2766if (LU.Kind != Kind)
2767returnfalse;
2768
2769// Check for a mismatched access type, and fall back conservatively as needed.
2770// TODO: Be less conservative when the type is similar and can use the same
2771// addressing modes.
2772if (Kind == LSRUse::Address) {
2773if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2774 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2775 AccessTy.AddrSpace);
2776 }
2777 }
2778
2779// Conservatively assume HasBaseReg is true for now.
2780if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2781if (!isAlwaysFoldable(TTI, Kind, NewAccessTy,/*BaseGV=*/nullptr,
2782 LU.MaxOffset - NewOffset, HasBaseReg))
2783returnfalse;
2784 NewMinOffset = NewOffset;
2785 }elseif (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2786if (!isAlwaysFoldable(TTI, Kind, NewAccessTy,/*BaseGV=*/nullptr,
2787 NewOffset - LU.MinOffset, HasBaseReg))
2788returnfalse;
2789 NewMaxOffset = NewOffset;
2790 }
2791
2792// FIXME: We should be able to handle some level of scalable offset support
2793// for 'void', but in order to get basic support up and running this is
2794// being left out.
2795if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2796 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2797returnfalse;
2798
2799// Update the use.
2800 LU.MinOffset = NewMinOffset;
2801 LU.MaxOffset = NewMaxOffset;
2802 LU.AccessTy = NewAccessTy;
2803returntrue;
2804}
2805
2806/// Return an LSRUse index and an offset value for a fixup which needs the given
2807/// expression, with the given kind and optional access type. Either reuse an
2808/// existing use or create a new one, as needed.
2809std::pair<size_t, Immediate> LSRInstance::getUse(constSCEV *&Expr,
2810 LSRUse::KindType Kind,
2811 MemAccessTy AccessTy) {
2812constSCEV *Copy = Expr;
2813 ImmediateOffset =ExtractImmediate(Expr, SE);
2814
2815// Basic uses can't accept any offset, for example.
2816if (!isAlwaysFoldable(TTI, Kind, AccessTy,/*BaseGV=*/nullptr,
2817Offset,/*HasBaseReg=*/true)) {
2818 Expr =Copy;
2819Offset = Immediate::getFixed(0);
2820 }
2821
2822 std::pair<UseMapTy::iterator, bool>P =
2823 UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2824if (!P.second) {
2825// A use already existed with this base.
2826size_t LUIdx =P.first->second;
2827 LSRUse &LU =Uses[LUIdx];
2828if (reconcileNewOffset(LU,Offset,/*HasBaseReg=*/true, Kind, AccessTy))
2829// Reuse this use.
2830return std::make_pair(LUIdx,Offset);
2831 }
2832
2833// Create a new use.
2834size_t LUIdx =Uses.size();
2835P.first->second = LUIdx;
2836Uses.push_back(LSRUse(Kind, AccessTy));
2837 LSRUse &LU =Uses[LUIdx];
2838
2839 LU.MinOffset =Offset;
2840 LU.MaxOffset =Offset;
2841return std::make_pair(LUIdx,Offset);
2842}
2843
2844/// Delete the given use from the Uses list.
2845void LSRInstance::DeleteUse(LSRUse &LU,size_t LUIdx) {
2846if (&LU != &Uses.back())
2847std::swap(LU,Uses.back());
2848Uses.pop_back();
2849
2850// Update RegUses.
2851 RegUses.swapAndDropUse(LUIdx,Uses.size());
2852}
2853
2854/// Look for a use distinct from OrigLU which is has a formula that has the same
2855/// registers as the given formula.
2856LSRUse *
2857LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2858const LSRUse &OrigLU) {
2859// Search all uses for the formula. This could be more clever.
2860for (LSRUse &LU :Uses) {
2861// Check whether this use is close enough to OrigLU, to see whether it's
2862// worthwhile looking through its formulae.
2863// Ignore ICmpZero uses because they may contain formulae generated by
2864// GenerateICmpZeroScales, in which case adding fixup offsets may
2865// be invalid.
2866if (&LU != &OrigLU &&
2867 LU.Kind != LSRUse::ICmpZero &&
2868 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2869 LU.WidestFixupType == OrigLU.WidestFixupType &&
2870 LU.HasFormulaWithSameRegs(OrigF)) {
2871// Scan through this use's formulae.
2872for (const Formula &F : LU.Formulae) {
2873// Check to see if this formula has the same registers and symbols
2874// as OrigF.
2875if (F.BaseRegs == OrigF.BaseRegs &&
2876F.ScaledReg == OrigF.ScaledReg &&
2877F.BaseGV == OrigF.BaseGV &&
2878F.Scale == OrigF.Scale &&
2879F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2880if (F.BaseOffset.isZero())
2881return &LU;
2882// This is the formula where all the registers and symbols matched;
2883// there aren't going to be any others. Since we declined it, we
2884// can skip the rest of the formulae and proceed to the next LSRUse.
2885break;
2886 }
2887 }
2888 }
2889 }
2890
2891// Nothing looked good.
2892returnnullptr;
2893}
2894
2895void LSRInstance::CollectInterestingTypesAndFactors() {
2896SmallSetVector<const SCEV *, 4> Strides;
2897
2898// Collect interesting types and strides.
2899SmallVector<const SCEV *, 4> Worklist;
2900for (constIVStrideUse &U : IU) {
2901constSCEV *Expr = IU.getExpr(U);
2902if (!Expr)
2903continue;
2904
2905// Collect interesting types.
2906Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2907
2908// Add strides for mentioned loops.
2909 Worklist.push_back(Expr);
2910do {
2911constSCEV *S = Worklist.pop_back_val();
2912if (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2913if (AR->getLoop() == L)
2914 Strides.insert(AR->getStepRecurrence(SE));
2915 Worklist.push_back(AR->getStart());
2916 }elseif (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2917append_range(Worklist,Add->operands());
2918 }
2919 }while (!Worklist.empty());
2920 }
2921
2922// Compute interesting factors from the set of interesting strides.
2923for (SmallSetVector<const SCEV *, 4>::const_iterator
2924I = Strides.begin(), E = Strides.end();I != E; ++I)
2925for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2926 std::next(I); NewStrideIter != E; ++NewStrideIter) {
2927constSCEV *OldStride = *I;
2928constSCEV *NewStride = *NewStrideIter;
2929
2930if (SE.getTypeSizeInBits(OldStride->getType()) !=
2931 SE.getTypeSizeInBits(NewStride->getType())) {
2932if (SE.getTypeSizeInBits(OldStride->getType()) >
2933 SE.getTypeSizeInBits(NewStride->getType()))
2934 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2935else
2936 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2937 }
2938if (constSCEVConstant *Factor =
2939 dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2940 SE,true))) {
2941if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2942 Factors.insert(Factor->getAPInt().getSExtValue());
2943 }elseif (constSCEVConstant *Factor =
2944 dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2945 NewStride,
2946 SE,true))) {
2947if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2948 Factors.insert(Factor->getAPInt().getSExtValue());
2949 }
2950 }
2951
2952// If all uses use the same type, don't bother looking for truncation-based
2953// reuse.
2954if (Types.size() == 1)
2955Types.clear();
2956
2957LLVM_DEBUG(print_factors_and_types(dbgs()));
2958}
2959
2960/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
2961/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
2962/// IVStrideUses, we could partially skip this.
2963staticUser::op_iterator
2964findIVOperand(User::op_iterator OI,User::op_iterator OE,
2965Loop *L,ScalarEvolution &SE) {
2966for(; OI != OE; ++OI) {
2967if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2968if (!SE.isSCEVable(Oper->getType()))
2969continue;
2970
2971if (constSCEVAddRecExpr *AR =
2972 dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2973if (AR->getLoop() == L)
2974break;
2975 }
2976 }
2977 }
2978return OI;
2979}
2980
2981/// IVChain logic must consistently peek base TruncInst operands, so wrap it in
2982/// a convenient helper.
2983staticValue *getWideOperand(Value *Oper) {
2984if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2985return Trunc->getOperand(0);
2986return Oper;
2987}
2988
2989/// Return an approximation of this SCEV expression's "base", or NULL for any
2990/// constant. Returning the expression itself is conservative. Returning a
2991/// deeper subexpression is more precise and valid as long as it isn't less
2992/// complex than another subexpression. For expressions involving multiple
2993/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
2994/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
2995/// IVInc==b-a.
2996///
2997/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2998/// SCEVUnknown, we simply return the rightmost SCEV operand.
2999staticconstSCEV *getExprBase(constSCEV *S) {
3000switch (S->getSCEVType()) {
3001default:// including scUnknown.
3002return S;
3003casescConstant:
3004casescVScale:
3005returnnullptr;
3006casescTruncate:
3007returngetExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3008casescZeroExtend:
3009returngetExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3010casescSignExtend:
3011returngetExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3012casescAddExpr: {
3013// Skip over scaled operands (scMulExpr) to follow add operands as long as
3014// there's nothing more complex.
3015// FIXME: not sure if we want to recognize negation.
3016constSCEVAddExpr *Add = cast<SCEVAddExpr>(S);
3017for (constSCEV *SubExpr :reverse(Add->operands())) {
3018if (SubExpr->getSCEVType() ==scAddExpr)
3019returngetExprBase(SubExpr);
3020
3021if (SubExpr->getSCEVType() !=scMulExpr)
3022return SubExpr;
3023 }
3024return S;// all operands are scaled, be conservative.
3025 }
3026casescAddRecExpr:
3027returngetExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3028 }
3029llvm_unreachable("Unknown SCEV kind!");
3030}
3031
3032/// Return true if the chain increment is profitable to expand into a loop
3033/// invariant value, which may require its own register. A profitable chain
3034/// increment will be an offset relative to the same base. We allow such offsets
3035/// to potentially be used as chain increment as long as it's not obviously
3036/// expensive to expand using real instructions.
3037bool IVChain::isProfitableIncrement(constSCEV *OperExpr,
3038constSCEV *IncExpr,
3039ScalarEvolution &SE) {
3040// Aggressively form chains when -stress-ivchain.
3041if (StressIVChain)
3042returntrue;
3043
3044// Do not replace a constant offset from IV head with a nonconstant IV
3045// increment.
3046if (!isa<SCEVConstant>(IncExpr)) {
3047constSCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
3048if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
3049returnfalse;
3050 }
3051
3052SmallPtrSet<const SCEV*, 8> Processed;
3053return !isHighCostExpansion(IncExpr, Processed, SE);
3054}
3055
3056/// Return true if the number of registers needed for the chain is estimated to
3057/// be less than the number required for the individual IV users. First prohibit
3058/// any IV users that keep the IV live across increments (the Users set should
3059/// be empty). Next count the number and type of increments in the chain.
3060///
3061/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
3062/// effectively use postinc addressing modes. Only consider it profitable it the
3063/// increments can be computed in fewer registers when chained.
3064///
3065/// TODO: Consider IVInc free if it's already used in another chains.
3066staticboolisProfitableChain(IVChain &Chain,
3067SmallPtrSetImpl<Instruction *> &Users,
3068ScalarEvolution &SE,
3069constTargetTransformInfo &TTI) {
3070if (StressIVChain)
3071returntrue;
3072
3073if (!Chain.hasIncs())
3074returnfalse;
3075
3076if (!Users.empty()) {
3077LLVM_DEBUG(dbgs() <<"Chain: " << *Chain.Incs[0].UserInst <<" users:\n";
3078for (Instruction *Inst
3079 :Users) {dbgs() <<" " << *Inst <<"\n"; });
3080returnfalse;
3081 }
3082assert(!Chain.Incs.empty() &&"empty IV chains are not allowed");
3083
3084// The chain itself may require a register, so intialize cost to 1.
3085int cost = 1;
3086
3087// A complete chain likely eliminates the need for keeping the original IV in
3088// a register. LSR does not currently know how to form a complete chain unless
3089// the header phi already exists.
3090if (isa<PHINode>(Chain.tailUserInst())
3091 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3092 --cost;
3093 }
3094constSCEV *LastIncExpr =nullptr;
3095unsigned NumConstIncrements = 0;
3096unsigned NumVarIncrements = 0;
3097unsigned NumReusedIncrements = 0;
3098
3099if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3100returntrue;
3101
3102for (const IVInc &Inc : Chain) {
3103if (TTI.isProfitableLSRChainElement(Inc.UserInst))
3104returntrue;
3105if (Inc.IncExpr->isZero())
3106continue;
3107
3108// Incrementing by zero or some constant is neutral. We assume constants can
3109// be folded into an addressing mode or an add's immediate operand.
3110if (isa<SCEVConstant>(Inc.IncExpr)) {
3111 ++NumConstIncrements;
3112continue;
3113 }
3114
3115if (Inc.IncExpr == LastIncExpr)
3116 ++NumReusedIncrements;
3117else
3118 ++NumVarIncrements;
3119
3120 LastIncExpr = Inc.IncExpr;
3121 }
3122// An IV chain with a single increment is handled by LSR's postinc
3123// uses. However, a chain with multiple increments requires keeping the IV's
3124// value live longer than it needs to be if chained.
3125if (NumConstIncrements > 1)
3126 --cost;
3127
3128// Materializing increment expressions in the preheader that didn't exist in
3129// the original code may cost a register. For example, sign-extended array
3130// indices can produce ridiculous increments like this:
3131// IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
3132 cost += NumVarIncrements;
3133
3134// Reusing variable increments likely saves a register to hold the multiple of
3135// the stride.
3136 cost -= NumReusedIncrements;
3137
3138LLVM_DEBUG(dbgs() <<"Chain: " << *Chain.Incs[0].UserInst <<" Cost: " << cost
3139 <<"\n");
3140
3141return cost < 0;
3142}
3143
3144/// Add this IV user to an existing chain or make it the head of a new chain.
3145void LSRInstance::ChainInstruction(Instruction *UserInst,Instruction *IVOper,
3146SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3147// When IVs are used as types of varying widths, they are generally converted
3148// to a wider type with some uses remaining narrow under a (free) trunc.
3149Value *const NextIV =getWideOperand(IVOper);
3150constSCEV *const OperExpr = SE.getSCEV(NextIV);
3151constSCEV *const OperExprBase =getExprBase(OperExpr);
3152
3153// Visit all existing chains. Check if its IVOper can be computed as a
3154// profitable loop invariant increment from the last link in the Chain.
3155unsigned ChainIdx = 0, NChains = IVChainVec.size();
3156constSCEV *LastIncExpr =nullptr;
3157for (; ChainIdx < NChains; ++ChainIdx) {
3158 IVChain &Chain = IVChainVec[ChainIdx];
3159
3160// Prune the solution space aggressively by checking that both IV operands
3161// are expressions that operate on the same unscaled SCEVUnknown. This
3162// "base" will be canceled by the subsequent getMinusSCEV call. Checking
3163// first avoids creating extra SCEV expressions.
3164if (!StressIVChain && Chain.ExprBase != OperExprBase)
3165continue;
3166
3167Value *PrevIV =getWideOperand(Chain.Incs.back().IVOperand);
3168if (PrevIV->getType() != NextIV->getType())
3169continue;
3170
3171// A phi node terminates a chain.
3172if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3173continue;
3174
3175// The increment must be loop-invariant so it can be kept in a register.
3176constSCEV *PrevExpr = SE.getSCEV(PrevIV);
3177constSCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
3178if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))
3179continue;
3180
3181if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3182 LastIncExpr = IncExpr;
3183break;
3184 }
3185 }
3186// If we haven't found a chain, create a new one, unless we hit the max. Don't
3187// bother for phi nodes, because they must be last in the chain.
3188if (ChainIdx == NChains) {
3189if (isa<PHINode>(UserInst))
3190return;
3191if (NChains >= MaxChains && !StressIVChain) {
3192LLVM_DEBUG(dbgs() <<"IV Chain Limit\n");
3193return;
3194 }
3195 LastIncExpr = OperExpr;
3196// IVUsers may have skipped over sign/zero extensions. We don't currently
3197// attempt to form chains involving extensions unless they can be hoisted
3198// into this loop's AddRec.
3199if (!isa<SCEVAddRecExpr>(LastIncExpr))
3200return;
3201 ++NChains;
3202 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3203 OperExprBase));
3204 ChainUsersVec.resize(NChains);
3205LLVM_DEBUG(dbgs() <<"IV Chain#" << ChainIdx <<" Head: (" << *UserInst
3206 <<") IV=" << *LastIncExpr <<"\n");
3207 }else {
3208LLVM_DEBUG(dbgs() <<"IV Chain#" << ChainIdx <<" Inc: (" << *UserInst
3209 <<") IV+" << *LastIncExpr <<"\n");
3210// Add this IV user to the end of the chain.
3211 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3212 }
3213 IVChain &Chain = IVChainVec[ChainIdx];
3214
3215SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3216// This chain's NearUsers become FarUsers.
3217if (!LastIncExpr->isZero()) {
3218 ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
3219 NearUsers.end());
3220 NearUsers.clear();
3221 }
3222
3223// All other uses of IVOperand become near uses of the chain.
3224// We currently ignore intermediate values within SCEV expressions, assuming
3225// they will eventually be used be the current chain, or can be computed
3226// from one of the chain increments. To be more precise we could
3227// transitively follow its user and only add leaf IV users to the set.
3228for (User *U : IVOper->users()) {
3229Instruction *OtherUse = dyn_cast<Instruction>(U);
3230if (!OtherUse)
3231continue;
3232// Uses in the chain will no longer be uses if the chain is formed.
3233// Include the head of the chain in this iteration (not Chain.begin()).
3234 IVChain::const_iterator IncIter = Chain.Incs.begin();
3235 IVChain::const_iterator IncEnd = Chain.Incs.end();
3236for( ; IncIter != IncEnd; ++IncIter) {
3237if (IncIter->UserInst == OtherUse)
3238break;
3239 }
3240if (IncIter != IncEnd)
3241continue;
3242
3243if (SE.isSCEVable(OtherUse->getType())
3244 && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
3245 && IU.isIVUserOrOperand(OtherUse)) {
3246continue;
3247 }
3248 NearUsers.insert(OtherUse);
3249 }
3250
3251// Since this user is part of the chain, it's no longer considered a use
3252// of the chain.
3253 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
3254}
3255
3256/// Populate the vector of Chains.
3257///
3258/// This decreases ILP at the architecture level. Targets with ample registers,
3259/// multiple memory ports, and no register renaming probably don't want
3260/// this. However, such targets should probably disable LSR altogether.
3261///
3262/// The job of LSR is to make a reasonable choice of induction variables across
3263/// the loop. Subsequent passes can easily "unchain" computation exposing more
3264/// ILP *within the loop* if the target wants it.
3265///
3266/// Finding the best IV chain is potentially a scheduling problem. Since LSR
3267/// will not reorder memory operations, it will recognize this as a chain, but
3268/// will generate redundant IV increments. Ideally this would be corrected later
3269/// by a smart scheduler:
3270/// = A[i]
3271/// = A[i+x]
3272/// A[i] =
3273/// A[i+x] =
3274///
3275/// TODO: Walk the entire domtree within this loop, not just the path to the
3276/// loop latch. This will discover chains on side paths, but requires
3277/// maintaining multiple copies of the Chains state.
3278void LSRInstance::CollectChains() {
3279LLVM_DEBUG(dbgs() <<"Collecting IV Chains.\n");
3280SmallVector<ChainUsers, 8> ChainUsersVec;
3281
3282SmallVector<BasicBlock *,8> LatchPath;
3283BasicBlock *LoopHeader =L->getHeader();
3284for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
3285 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3286 LatchPath.push_back(Rung->getBlock());
3287 }
3288 LatchPath.push_back(LoopHeader);
3289
3290// Walk the instruction stream from the loop header to the loop latch.
3291for (BasicBlock *BB :reverse(LatchPath)) {
3292for (Instruction &I : *BB) {
3293// Skip instructions that weren't seen by IVUsers analysis.
3294if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
3295continue;
3296
3297// Ignore users that are part of a SCEV expression. This way we only
3298// consider leaf IV Users. This effectively rediscovers a portion of
3299// IVUsers analysis but in program order this time.
3300if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
3301continue;
3302
3303// Remove this instruction from any NearUsers set it may be in.
3304for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
3305 ChainIdx < NChains; ++ChainIdx) {
3306 ChainUsersVec[ChainIdx].NearUsers.erase(&I);
3307 }
3308// Search for operands that can be chained.
3309SmallPtrSet<Instruction*, 4> UniqueOperands;
3310User::op_iterator IVOpEnd =I.op_end();
3311User::op_iterator IVOpIter =findIVOperand(I.op_begin(), IVOpEnd, L, SE);
3312while (IVOpIter != IVOpEnd) {
3313Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3314if (UniqueOperands.insert(IVOpInst).second)
3315 ChainInstruction(&I, IVOpInst, ChainUsersVec);
3316 IVOpIter =findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3317 }
3318 }// Continue walking down the instructions.
3319 }// Continue walking down the domtree.
3320// Visit phi backedges to determine if the chain can generate the IV postinc.
3321for (PHINode &PN :L->getHeader()->phis()) {
3322if (!SE.isSCEVable(PN.getType()))
3323continue;
3324
3325Instruction *IncV =
3326 dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
3327if (IncV)
3328 ChainInstruction(&PN, IncV, ChainUsersVec);
3329 }
3330// Remove any unprofitable chains.
3331unsigned ChainIdx = 0;
3332for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
3333 UsersIdx < NChains; ++UsersIdx) {
3334if (!isProfitableChain(IVChainVec[UsersIdx],
3335 ChainUsersVec[UsersIdx].FarUsers, SE,TTI))
3336continue;
3337// Preserve the chain at UsesIdx.
3338if (ChainIdx != UsersIdx)
3339 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3340 FinalizeChain(IVChainVec[ChainIdx]);
3341 ++ChainIdx;
3342 }
3343 IVChainVec.resize(ChainIdx);
3344}
3345
3346void LSRInstance::FinalizeChain(IVChain &Chain) {
3347assert(!Chain.Incs.empty() &&"empty IV chains are not allowed");
3348LLVM_DEBUG(dbgs() <<"Final Chain: " << *Chain.Incs[0].UserInst <<"\n");
3349
3350for (const IVInc &Inc : Chain) {
3351LLVM_DEBUG(dbgs() <<" Inc: " << *Inc.UserInst <<"\n");
3352auto UseI =find(Inc.UserInst->operands(), Inc.IVOperand);
3353assert(UseI != Inc.UserInst->op_end() &&"cannot find IV operand");
3354 IVIncSet.insert(UseI);
3355 }
3356}
3357
3358/// Return true if the IVInc can be folded into an addressing mode.
3359staticboolcanFoldIVIncExpr(constSCEV *IncExpr,Instruction *UserInst,
3360Value *Operand,constTargetTransformInfo &TTI) {
3361constSCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3362 Immediate IncOffset = Immediate::getZero();
3363if (IncConst) {
3364if (IncConst && IncConst->getAPInt().getSignificantBits() > 64)
3365returnfalse;
3366 IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue());
3367 }else {
3368// Look for mul(vscale, constant), to detect a scalable offset.
3369auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3370if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3371 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3372returnfalse;
3373auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3374if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3375returnfalse;
3376 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3377 }
3378
3379if (!isAddressUse(TTI, UserInst, Operand))
3380returnfalse;
3381
3382 MemAccessTy AccessTy =getAccessType(TTI, UserInst, Operand);
3383if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy,/*BaseGV=*/nullptr,
3384 IncOffset,/*HasBaseReg=*/false))
3385returnfalse;
3386
3387returntrue;
3388}
3389
3390/// Generate an add or subtract for each IVInc in a chain to materialize the IV
3391/// user's operand from the previous IV user's operand.
3392void LSRInstance::GenerateIVChain(const IVChain &Chain,
3393SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3394// Find the new IVOperand for the head of the chain. It may have been replaced
3395// by LSR.
3396const IVInc &Head = Chain.Incs[0];
3397User::op_iterator IVOpEnd = Head.UserInst->op_end();
3398// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
3399User::op_iterator IVOpIter =findIVOperand(Head.UserInst->op_begin(),
3400 IVOpEnd, L, SE);
3401Value *IVSrc =nullptr;
3402while (IVOpIter != IVOpEnd) {
3403 IVSrc =getWideOperand(*IVOpIter);
3404
3405// If this operand computes the expression that the chain needs, we may use
3406// it. (Check this after setting IVSrc which is used below.)
3407//
3408// Note that if Head.IncExpr is wider than IVSrc, then this phi is too
3409// narrow for the chain, so we can no longer use it. We do allow using a
3410// wider phi, assuming the LSR checked for free truncation. In that case we
3411// should already have a truncate on this operand such that
3412// getSCEV(IVSrc) == IncExpr.
3413if (SE.getSCEV(*IVOpIter) == Head.IncExpr
3414 || SE.getSCEV(IVSrc) == Head.IncExpr) {
3415break;
3416 }
3417 IVOpIter =findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3418 }
3419if (IVOpIter == IVOpEnd) {
3420// Gracefully give up on this chain.
3421LLVM_DEBUG(dbgs() <<"Concealed chain head: " << *Head.UserInst <<"\n");
3422return;
3423 }
3424assert(IVSrc &&"Failed to find IV chain source");
3425
3426LLVM_DEBUG(dbgs() <<"Generate chain at: " << *IVSrc <<"\n");
3427Type *IVTy = IVSrc->getType();
3428Type *IntTy = SE.getEffectiveSCEVType(IVTy);
3429constSCEV *LeftOverExpr =nullptr;
3430constSCEV *Accum = SE.getZero(IntTy);
3431SmallVector<std::pair<const SCEV *, Value *>> Bases;
3432 Bases.emplace_back(Accum, IVSrc);
3433
3434for (const IVInc &Inc : Chain) {
3435Instruction *InsertPt = Inc.UserInst;
3436if (isa<PHINode>(InsertPt))
3437 InsertPt =L->getLoopLatch()->getTerminator();
3438
3439// IVOper will replace the current IV User's operand. IVSrc is the IV
3440// value currently held in a register.
3441Value *IVOper = IVSrc;
3442if (!Inc.IncExpr->isZero()) {
3443// IncExpr was the result of subtraction of two narrow values, so must
3444// be signed.
3445constSCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
3446 Accum = SE.getAddExpr(Accum, IncExpr);
3447 LeftOverExpr = LeftOverExpr ?
3448 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3449 }
3450
3451// Look through each base to see if any can produce a nice addressing mode.
3452bool FoundBase =false;
3453for (auto [MapScev, MapIVOper] :reverse(Bases)) {
3454constSCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);
3455if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand,TTI)) {
3456if (!Remainder->isZero()) {
3457Rewriter.clearPostInc();
3458Value *IncV =Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3459constSCEV *IVOperExpr =
3460 SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV));
3461 IVOper =Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3462 }else {
3463 IVOper = MapIVOper;
3464 }
3465
3466 FoundBase =true;
3467break;
3468 }
3469 }
3470if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {
3471// Expand the IV increment.
3472Rewriter.clearPostInc();
3473Value *IncV =Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3474constSCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
3475 SE.getUnknown(IncV));
3476 IVOper =Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3477
3478// If an IV increment can't be folded, use it as the next IV value.
3479if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand,TTI)) {
3480assert(IVTy == IVOper->getType() &&"inconsistent IV increment type");
3481 Bases.emplace_back(Accum, IVOper);
3482 IVSrc = IVOper;
3483 LeftOverExpr =nullptr;
3484 }
3485 }
3486Type *OperTy = Inc.IVOperand->getType();
3487if (IVTy != OperTy) {
3488assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
3489"cannot extend a chained IV");
3490IRBuilder<> Builder(InsertPt);
3491 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,"lsr.chain");
3492 }
3493 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3494if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3495 DeadInsts.emplace_back(OperandIsInstr);
3496 }
3497// If LSR created a new, wider phi, we may also replace its postinc. We only
3498// do this if we also found a wide value for the head of the chain.
3499if (isa<PHINode>(Chain.tailUserInst())) {
3500for (PHINode &Phi :L->getHeader()->phis()) {
3501if (Phi.getType() != IVSrc->getType())
3502continue;
3503Instruction *PostIncV = dyn_cast<Instruction>(
3504Phi.getIncomingValueForBlock(L->getLoopLatch()));
3505if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
3506continue;
3507Value *IVOper = IVSrc;
3508Type *PostIncTy = PostIncV->getType();
3509if (IVTy != PostIncTy) {
3510assert(PostIncTy->isPointerTy() &&"mixing int/ptr IV types");
3511IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3512 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3513 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,"lsr.chain");
3514 }
3515Phi.replaceUsesOfWith(PostIncV, IVOper);
3516 DeadInsts.emplace_back(PostIncV);
3517 }
3518 }
3519}
3520
3521void LSRInstance::CollectFixupsAndInitialFormulae() {
3522BranchInst *ExitBranch =nullptr;
3523bool SaveCmp =TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3524
3525// For calculating baseline cost
3526SmallPtrSet<const SCEV *, 16> Regs;
3527DenseSet<const SCEV *> VisitedRegs;
3528DenseSet<size_t> VisitedLSRUse;
3529
3530for (constIVStrideUse &U : IU) {
3531Instruction *UserInst =U.getUser();
3532// Skip IV users that are part of profitable IV Chains.
3533User::op_iterator UseI =
3534find(UserInst->operands(),U.getOperandValToReplace());
3535assert(UseI != UserInst->op_end() &&"cannot find IV operand");
3536if (IVIncSet.count(UseI)) {
3537LLVM_DEBUG(dbgs() <<"Use is in profitable chain: " << **UseI <<'\n');
3538continue;
3539 }
3540
3541 LSRUse::KindTypeKind = LSRUse::Basic;
3542 MemAccessTy AccessTy;
3543if (isAddressUse(TTI, UserInst,U.getOperandValToReplace())) {
3544Kind = LSRUse::Address;
3545 AccessTy =getAccessType(TTI, UserInst,U.getOperandValToReplace());
3546 }
3547
3548constSCEV *S = IU.getExpr(U);
3549if (!S)
3550continue;
3551PostIncLoopSet TmpPostIncLoops =U.getPostIncLoops();
3552
3553// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
3554// (N - i == 0), and this allows (N - i) to be the expression that we work
3555// with rather than just N or i, so we can consider the register
3556// requirements for both N and i at the same time. Limiting this code to
3557// equality icmps is not a problem because all interesting loops use
3558// equality icmps, thanks to IndVarSimplify.
3559if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3560// If CI can be saved in some target, like replaced inside hardware loop
3561// in PowerPC, no need to generate initial formulae for it.
3562if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
3563continue;
3564if (CI->isEquality()) {
3565// Swap the operands if needed to put the OperandValToReplace on the
3566// left, for consistency.
3567Value *NV = CI->getOperand(1);
3568if (NV ==U.getOperandValToReplace()) {
3569 CI->setOperand(1, CI->getOperand(0));
3570 CI->setOperand(0, NV);
3571NV = CI->getOperand(1);
3572 Changed =true;
3573 }
3574
3575// x == y --> x - y == 0
3576constSCEV *N = SE.getSCEV(NV);
3577if (SE.isLoopInvariant(N, L) &&Rewriter.isSafeToExpand(N) &&
3578 (!NV->getType()->isPointerTy() ||
3579 SE.getPointerBase(N) == SE.getPointerBase(S))) {
3580// S is normalized, so normalize N before folding it into S
3581// to keep the result normalized.
3582N =normalizeForPostIncUse(N, TmpPostIncLoops, SE);
3583if (!N)
3584continue;
3585Kind = LSRUse::ICmpZero;
3586 S = SE.getMinusSCEV(N, S);
3587 }elseif (L->isLoopInvariant(NV) &&
3588 (!isa<Instruction>(NV) ||
3589 DT.dominates(cast<Instruction>(NV),L->getHeader())) &&
3590 !NV->getType()->isPointerTy()) {
3591// If we can't generally expand the expression (e.g. it contains
3592// a divide), but it is already at a loop invariant point before the
3593// loop, wrap it in an unknown (to prevent the expander from trying
3594// to re-expand in a potentially unsafe way.) The restriction to
3595// integer types is required because the unknown hides the base, and
3596// SCEV can't compute the difference of two unknown pointers.
3597N = SE.getUnknown(NV);
3598N =normalizeForPostIncUse(N, TmpPostIncLoops, SE);
3599if (!N)
3600continue;
3601Kind = LSRUse::ICmpZero;
3602 S = SE.getMinusSCEV(N, S);
3603assert(!isa<SCEVCouldNotCompute>(S));
3604 }
3605
3606// -1 and the negations of all interesting strides (except the negation
3607// of -1) are now also interesting.
3608for (size_t i = 0, e = Factors.size(); i != e; ++i)
3609if (Factors[i] != -1)
3610 Factors.insert(-(uint64_t)Factors[i]);
3611 Factors.insert(-1);
3612 }
3613 }
3614
3615// Get or create an LSRUse.
3616 std::pair<size_t, Immediate>P = getUse(S, Kind, AccessTy);
3617size_t LUIdx =P.first;
3618 ImmediateOffset =P.second;
3619 LSRUse &LU =Uses[LUIdx];
3620
3621// Record the fixup.
3622 LSRFixup &LF = LU.getNewFixup();
3623 LF.UserInst = UserInst;
3624 LF.OperandValToReplace =U.getOperandValToReplace();
3625 LF.PostIncLoops = TmpPostIncLoops;
3626 LF.Offset =Offset;
3627 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3628
3629// Create SCEV as Formula for calculating baseline cost
3630if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3631 FormulaF;
3632F.initialMatch(S, L, SE);
3633 BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);
3634 VisitedLSRUse.insert(LUIdx);
3635 }
3636
3637if (!LU.WidestFixupType ||
3638 SE.getTypeSizeInBits(LU.WidestFixupType) <
3639 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3640 LU.WidestFixupType = LF.OperandValToReplace->getType();
3641
3642// If this is the first use of this LSRUse, give it a formula.
3643if (LU.Formulae.empty()) {
3644 InsertInitialFormula(S, LU, LUIdx);
3645 CountRegisters(LU.Formulae.back(), LUIdx);
3646 }
3647 }
3648
3649LLVM_DEBUG(print_fixups(dbgs()));
3650}
3651
3652/// Insert a formula for the given expression into the given use, separating out
3653/// loop-variant portions from loop-invariant and loop-computable portions.
3654void LSRInstance::InsertInitialFormula(constSCEV *S, LSRUse &LU,
3655size_t LUIdx) {
3656// Mark uses whose expressions cannot be expanded.
3657if (!Rewriter.isSafeToExpand(S))
3658 LU.RigidFormula =true;
3659
3660 FormulaF;
3661F.initialMatch(S, L, SE);
3662boolInserted = InsertFormula(LU, LUIdx,F);
3663assert(Inserted &&"Initial formula already exists!"); (void)Inserted;
3664}
3665
3666/// Insert a simple single-register formula for the given expression into the
3667/// given use.
3668void
3669LSRInstance::InsertSupplementalFormula(constSCEV *S,
3670 LSRUse &LU,size_t LUIdx) {
3671 FormulaF;
3672F.BaseRegs.push_back(S);
3673F.HasBaseReg =true;
3674boolInserted = InsertFormula(LU, LUIdx,F);
3675assert(Inserted &&"Supplemental formula already exists!"); (void)Inserted;
3676}
3677
3678/// Note which registers are used by the given formula, updating RegUses.
3679void LSRInstance::CountRegisters(const Formula &F,size_t LUIdx) {
3680if (F.ScaledReg)
3681 RegUses.countRegister(F.ScaledReg, LUIdx);
3682for (constSCEV *BaseReg :F.BaseRegs)
3683 RegUses.countRegister(BaseReg, LUIdx);
3684}
3685
3686/// If the given formula has not yet been inserted, add it to the list, and
3687/// return true. Return false otherwise.
3688bool LSRInstance::InsertFormula(LSRUse &LU,unsigned LUIdx,const Formula &F) {
3689// Do not insert formula that we will not be able to expand.
3690assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F) &&
3691"Formula is illegal");
3692
3693if (!LU.InsertFormula(F, *L))
3694returnfalse;
3695
3696 CountRegisters(F, LUIdx);
3697returntrue;
3698}
3699
3700/// Check for other uses of loop-invariant values which we're tracking. These
3701/// other uses will pin these values in registers, making them less profitable
3702/// for elimination.
3703/// TODO: This currently misses non-constant addrec step registers.
3704/// TODO: Should this give more weight to users inside the loop?
3705void
3706LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3707SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
3708SmallPtrSet<const SCEV *, 32> Visited;
3709
3710// Don't collect outside uses if we are favoring postinc - the instructions in
3711// the loop are more important than the ones outside of it.
3712if (AMK ==TTI::AMK_PostIndexed)
3713return;
3714
3715while (!Worklist.empty()) {
3716constSCEV *S = Worklist.pop_back_val();
3717
3718// Don't process the same SCEV twice
3719if (!Visited.insert(S).second)
3720continue;
3721
3722if (constSCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3723append_range(Worklist,N->operands());
3724elseif (constSCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
3725 Worklist.push_back(C->getOperand());
3726elseif (constSCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3727 Worklist.push_back(D->getLHS());
3728 Worklist.push_back(D->getRHS());
3729 }elseif (constSCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3730constValue *V = US->getValue();
3731if (constInstruction *Inst = dyn_cast<Instruction>(V)) {
3732// Look for instructions defined outside the loop.
3733if (L->contains(Inst))continue;
3734 }elseif (isa<Constant>(V))
3735// Constants can be re-materialized.
3736continue;
3737for (constUse &U :V->uses()) {
3738constInstruction *UserInst = dyn_cast<Instruction>(U.getUser());
3739// Ignore non-instructions.
3740if (!UserInst)
3741continue;
3742// Don't bother if the instruction is an EHPad.
3743if (UserInst->isEHPad())
3744continue;
3745// Ignore instructions in other functions (as can happen with
3746// Constants).
3747if (UserInst->getParent()->getParent() !=L->getHeader()->getParent())
3748continue;
3749// Ignore instructions not dominated by the loop.
3750constBasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3751 UserInst->getParent() :
3752 cast<PHINode>(UserInst)->getIncomingBlock(
3753PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3754if (!DT.dominates(L->getHeader(), UseBB))
3755continue;
3756// Don't bother if the instruction is in a BB which ends in an EHPad.
3757if (UseBB->getTerminator()->isEHPad())
3758continue;
3759
3760// Ignore cases in which the currently-examined value could come from
3761// a basic block terminated with an EHPad. This checks all incoming
3762// blocks of the phi node since it is possible that the same incoming
3763// value comes from multiple basic blocks, only some of which may end
3764// in an EHPad. If any of them do, a subsequent rewrite attempt by this
3765// pass would try to insert instructions into an EHPad, hitting an
3766// assertion.
3767if (isa<PHINode>(UserInst)) {
3768constauto *PhiNode = cast<PHINode>(UserInst);
3769bool HasIncompatibleEHPTerminatedBlock =false;
3770llvm::Value *ExpectedValue =U;
3771for (unsignedintI = 0;I < PhiNode->getNumIncomingValues();I++) {
3772if (PhiNode->getIncomingValue(I) == ExpectedValue) {
3773if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {
3774 HasIncompatibleEHPTerminatedBlock =true;
3775break;
3776 }
3777 }
3778 }
3779if (HasIncompatibleEHPTerminatedBlock) {
3780continue;
3781 }
3782 }
3783
3784// Don't bother rewriting PHIs in catchswitch blocks.
3785if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
3786continue;
3787// Ignore uses which are part of other SCEV expressions, to avoid
3788// analyzing them multiple times.
3789if (SE.isSCEVable(UserInst->getType())) {
3790constSCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3791// If the user is a no-op, look through to its uses.
3792if (!isa<SCEVUnknown>(UserS))
3793continue;
3794if (UserS == US) {
3795 Worklist.push_back(
3796 SE.getUnknown(const_cast<Instruction *>(UserInst)));
3797continue;
3798 }
3799 }
3800// Ignore icmp instructions which are already being analyzed.
3801if (constICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3802unsigned OtherIdx = !U.getOperandNo();
3803Value *OtherOp =const_cast<Value *>(ICI->getOperand(OtherIdx));
3804if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3805continue;
3806 }
3807
3808 std::pair<size_t, Immediate>P =
3809 getUse(S, LSRUse::Basic, MemAccessTy());
3810size_t LUIdx =P.first;
3811 ImmediateOffset =P.second;
3812 LSRUse &LU =Uses[LUIdx];
3813 LSRFixup &LF = LU.getNewFixup();
3814 LF.UserInst =const_cast<Instruction *>(UserInst);
3815 LF.OperandValToReplace =U;
3816 LF.Offset =Offset;
3817 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3818if (!LU.WidestFixupType ||
3819 SE.getTypeSizeInBits(LU.WidestFixupType) <
3820 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3821 LU.WidestFixupType = LF.OperandValToReplace->getType();
3822 InsertSupplementalFormula(US, LU, LUIdx);
3823 CountRegisters(LU.Formulae.back(),Uses.size() - 1);
3824break;
3825 }
3826 }
3827 }
3828}
3829
3830/// Split S into subexpressions which can be pulled out into separate
3831/// registers. If C is non-null, multiply each subexpression by C.
3832///
3833/// Return remainder expression after factoring the subexpressions captured by
3834/// Ops. If Ops is complete, return NULL.
3835staticconstSCEV *CollectSubexprs(constSCEV *S,constSCEVConstant *C,
3836SmallVectorImpl<const SCEV *> &Ops,
3837constLoop *L,
3838ScalarEvolution &SE,
3839unsignedDepth = 0) {
3840// Arbitrarily cap recursion to protect compile time.
3841if (Depth >= 3)
3842return S;
3843
3844if (constSCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3845// Break out add operands.
3846for (constSCEV *S :Add->operands()) {
3847constSCEV *Remainder =CollectSubexprs(S,C, Ops, L, SE,Depth+1);
3848if (Remainder)
3849 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3850 }
3851returnnullptr;
3852 }elseif (constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3853// Split a non-zero base out of an addrec.
3854if (AR->getStart()->isZero() || !AR->isAffine())
3855return S;
3856
3857constSCEV *Remainder =CollectSubexprs(AR->getStart(),
3858C, Ops, L, SE,Depth+1);
3859// Split the non-zero AddRec unless it is part of a nested recurrence that
3860// does not pertain to this loop.
3861if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3862 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3863 Remainder =nullptr;
3864 }
3865if (Remainder != AR->getStart()) {
3866if (!Remainder)
3867 Remainder = SE.getConstant(AR->getType(), 0);
3868return SE.getAddRecExpr(Remainder,
3869 AR->getStepRecurrence(SE),
3870 AR->getLoop(),
3871//FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3872SCEV::FlagAnyWrap);
3873 }
3874 }elseif (constSCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3875// Break (C * (a + b + c)) into C*a + C*b + C*c.
3876if (Mul->getNumOperands() != 2)
3877return S;
3878if (constSCEVConstant *Op0 =
3879 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3880C =C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
3881constSCEV *Remainder =
3882CollectSubexprs(Mul->getOperand(1),C, Ops, L, SE,Depth+1);
3883if (Remainder)
3884 Ops.push_back(SE.getMulExpr(C, Remainder));
3885returnnullptr;
3886 }
3887 }
3888return S;
3889}
3890
3891/// Return true if the SCEV represents a value that may end up as a
3892/// post-increment operation.
3893staticboolmayUsePostIncMode(constTargetTransformInfo &TTI,
3894 LSRUse &LU,constSCEV *S,constLoop *L,
3895ScalarEvolution &SE) {
3896if (LU.Kind != LSRUse::Address ||
3897 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3898returnfalse;
3899constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
3900if (!AR)
3901returnfalse;
3902constSCEV *LoopStep = AR->getStepRecurrence(SE);
3903if (!isa<SCEVConstant>(LoopStep))
3904returnfalse;
3905// Check if a post-indexed load/store can be used.
3906if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
3907TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
3908constSCEV *LoopStart = AR->getStart();
3909if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
3910returntrue;
3911 }
3912returnfalse;
3913}
3914
3915/// Helper function for LSRInstance::GenerateReassociations.
3916void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,unsigned LUIdx,
3917const Formula &Base,
3918unsignedDepth,size_tIdx,
3919bool IsScaledReg) {
3920constSCEV *BaseReg = IsScaledReg ?Base.ScaledReg :Base.BaseRegs[Idx];
3921// Don't generate reassociations for the base register of a value that
3922// may generate a post-increment operator. The reason is that the
3923// reassociations cause extra base+register formula to be created,
3924// and possibly chosen, but the post-increment is more efficient.
3925if (AMK ==TTI::AMK_PostIndexed &&mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
3926return;
3927SmallVector<const SCEV *, 8> AddOps;
3928constSCEV *Remainder =CollectSubexprs(BaseReg,nullptr, AddOps, L, SE);
3929if (Remainder)
3930 AddOps.push_back(Remainder);
3931
3932if (AddOps.size() == 1)
3933return;
3934
3935for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3936 JE = AddOps.end();
3937 J != JE; ++J) {
3938// Loop-variant "unknown" values are uninteresting; we won't be able to
3939// do anything meaningful with them.
3940if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3941continue;
3942
3943// Don't pull a constant into a register if the constant could be folded
3944// into an immediate field.
3945if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3946 LU.AccessTy, *J,Base.getNumRegs() > 1))
3947continue;
3948
3949// Collect all operands except *J.
3950SmallVector<const SCEV *, 8> InnerAddOps(
3951 ((constSmallVector<const SCEV *, 8> &)AddOps).begin(), J);
3952 InnerAddOps.append(std::next(J),
3953 ((constSmallVector<const SCEV *, 8> &)AddOps).end());
3954
3955// Don't leave just a constant behind in a register if the constant could
3956// be folded into an immediate field.
3957if (InnerAddOps.size() == 1 &&
3958isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3959 LU.AccessTy, InnerAddOps[0],Base.getNumRegs() > 1))
3960continue;
3961
3962constSCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3963if (InnerSum->isZero())
3964continue;
3965 FormulaF =Base;
3966
3967if (F.UnfoldedOffset.isNonZero() &&F.UnfoldedOffset.isScalable())
3968continue;
3969
3970// Add the remaining pieces of the add back into the new formula.
3971constSCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3972if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3973TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
3974 InnerSumSC->getValue()->getZExtValue())) {
3975F.UnfoldedOffset =
3976 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
3977 InnerSumSC->getValue()->getZExtValue());
3978if (IsScaledReg) {
3979F.ScaledReg =nullptr;
3980F.Scale = 0;
3981 }else
3982F.BaseRegs.erase(F.BaseRegs.begin() +Idx);
3983 }elseif (IsScaledReg)
3984F.ScaledReg = InnerSum;
3985else
3986F.BaseRegs[Idx] = InnerSum;
3987
3988// Add J as its own register, or an unfolded immediate.
3989constSCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3990if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3991TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
3992SC->getValue()->getZExtValue()))
3993F.UnfoldedOffset =
3994 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
3995SC->getValue()->getZExtValue());
3996else
3997F.BaseRegs.push_back(*J);
3998// We may have changed the number of register in base regs, adjust the
3999// formula accordingly.
4000F.canonicalize(*L);
4001
4002if (InsertFormula(LU, LUIdx,F))
4003// If that formula hadn't been seen before, recurse to find more like
4004// it.
4005// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
4006// Because just Depth is not enough to bound compile time.
4007// This means that every time AddOps.size() is greater 16^x we will add
4008// x to Depth.
4009 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4010Depth + 1 + (Log2_32(AddOps.size()) >> 2));
4011 }
4012}
4013
4014/// Split out subexpressions from adds and the bases of addrecs.
4015void LSRInstance::GenerateReassociations(LSRUse &LU,unsigned LUIdx,
4016 FormulaBase,unsignedDepth) {
4017assert(Base.isCanonical(*L) &&"Input must be in the canonical form");
4018// Arbitrarily cap recursion to protect compile time.
4019if (Depth >= 3)
4020return;
4021
4022for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i)
4023 GenerateReassociationsImpl(LU, LUIdx,Base,Depth, i);
4024
4025if (Base.Scale == 1)
4026 GenerateReassociationsImpl(LU, LUIdx,Base,Depth,
4027/* Idx */ -1,/* IsScaledReg */true);
4028}
4029
4030/// Generate a formula consisting of all of the loop-dominating registers added
4031/// into a single register.
4032void LSRInstance::GenerateCombinations(LSRUse &LU,unsigned LUIdx,
4033 FormulaBase) {
4034// This method is only interesting on a plurality of registers.
4035if (Base.BaseRegs.size() + (Base.Scale == 1) +
4036 (Base.UnfoldedOffset.isNonZero()) <=
4037 1)
4038return;
4039
4040// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
4041// processing the formula.
4042Base.unscale();
4043SmallVector<const SCEV *, 4> Ops;
4044 Formula NewBase =Base;
4045 NewBase.BaseRegs.clear();
4046Type *CombinedIntegerType =nullptr;
4047for (constSCEV *BaseReg :Base.BaseRegs) {
4048if (SE.properlyDominates(BaseReg,L->getHeader()) &&
4049 !SE.hasComputableLoopEvolution(BaseReg, L)) {
4050if (!CombinedIntegerType)
4051 CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
4052 Ops.push_back(BaseReg);
4053 }
4054else
4055 NewBase.BaseRegs.push_back(BaseReg);
4056 }
4057
4058// If no register is relevant, we're done.
4059if (Ops.size() == 0)
4060return;
4061
4062// Utility function for generating the required variants of the combined
4063// registers.
4064auto GenerateFormula = [&](constSCEV *Sum) {
4065 FormulaF = NewBase;
4066
4067// TODO: If Sum is zero, it probably means ScalarEvolution missed an
4068// opportunity to fold something. For now, just ignore such cases
4069// rather than proceed with zero in a register.
4070if (Sum->isZero())
4071return;
4072
4073F.BaseRegs.push_back(Sum);
4074F.canonicalize(*L);
4075 (void)InsertFormula(LU, LUIdx,F);
4076 };
4077
4078// If we collected at least two registers, generate a formula combining them.
4079if (Ops.size() > 1) {
4080SmallVector<const SCEV *, 4> OpsCopy(Ops);// Don't let SE modify Ops.
4081 GenerateFormula(SE.getAddExpr(OpsCopy));
4082 }
4083
4084// If we have an unfolded offset, generate a formula combining it with the
4085// registers collected.
4086if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4087assert(CombinedIntegerType &&"Missing a type for the unfolded offset");
4088 Ops.push_back(SE.getConstant(CombinedIntegerType,
4089 NewBase.UnfoldedOffset.getFixedValue(),true));
4090 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4091 GenerateFormula(SE.getAddExpr(Ops));
4092 }
4093}
4094
4095/// Helper function for LSRInstance::GenerateSymbolicOffsets.
4096void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,unsigned LUIdx,
4097const Formula &Base,size_tIdx,
4098bool IsScaledReg) {
4099constSCEV *G = IsScaledReg ?Base.ScaledReg :Base.BaseRegs[Idx];
4100GlobalValue *GV =ExtractSymbol(G, SE);
4101if (G->isZero() || !GV)
4102return;
4103 FormulaF =Base;
4104F.BaseGV = GV;
4105if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F))
4106return;
4107if (IsScaledReg)
4108F.ScaledReg =G;
4109else
4110F.BaseRegs[Idx] =G;
4111 (void)InsertFormula(LU, LUIdx,F);
4112}
4113
4114/// Generate reuse formulae using symbolic offsets.
4115void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,unsigned LUIdx,
4116 FormulaBase) {
4117// We can't add a symbolic offset if the address already contains one.
4118if (Base.BaseGV)return;
4119
4120for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i)
4121 GenerateSymbolicOffsetsImpl(LU, LUIdx,Base, i);
4122if (Base.Scale == 1)
4123 GenerateSymbolicOffsetsImpl(LU, LUIdx,Base,/* Idx */ -1,
4124/* IsScaledReg */true);
4125}
4126
4127/// Helper function for LSRInstance::GenerateConstantOffsets.
4128void LSRInstance::GenerateConstantOffsetsImpl(
4129 LSRUse &LU,unsigned LUIdx,const Formula &Base,
4130constSmallVectorImpl<Immediate> &Worklist,size_tIdx,bool IsScaledReg) {
4131
4132auto GenerateOffset = [&](constSCEV *G, ImmediateOffset) {
4133 FormulaF =Base;
4134if (!Base.BaseOffset.isCompatibleImmediate(Offset))
4135return;
4136F.BaseOffset =Base.BaseOffset.subUnsigned(Offset);
4137
4138if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F)) {
4139// Add the offset to the base register.
4140constSCEV *NewOffset =Offset.getSCEV(SE,G->getType());
4141constSCEV *NewG = SE.getAddExpr(NewOffset,G);
4142// If it cancelled out, drop the base register, otherwise update it.
4143if (NewG->isZero()) {
4144if (IsScaledReg) {
4145F.Scale = 0;
4146F.ScaledReg =nullptr;
4147 }else
4148F.deleteBaseReg(F.BaseRegs[Idx]);
4149F.canonicalize(*L);
4150 }elseif (IsScaledReg)
4151F.ScaledReg = NewG;
4152else
4153F.BaseRegs[Idx] = NewG;
4154
4155 (void)InsertFormula(LU, LUIdx,F);
4156 }
4157 };
4158
4159constSCEV *G = IsScaledReg ?Base.ScaledReg :Base.BaseRegs[Idx];
4160
4161// With constant offsets and constant steps, we can generate pre-inc
4162// accesses by having the offset equal the step. So, for access #0 with a
4163// step of 8, we generate a G - 8 base which would require the first access
4164// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
4165// for itself and hopefully becomes the base for other accesses. This means
4166// means that a single pre-indexed access can be generated to become the new
4167// base pointer for each iteration of the loop, resulting in no extra add/sub
4168// instructions for pointer updating.
4169if (AMK ==TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
4170if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
4171if (auto *StepRec =
4172 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4173constAPInt &StepInt = StepRec->getAPInt();
4174 int64_t Step = StepInt.isNegative() ?
4175 StepInt.getSExtValue() : StepInt.getZExtValue();
4176
4177for (ImmediateOffset : Worklist) {
4178if (Offset.isFixed()) {
4179Offset = Immediate::getFixed(Offset.getFixedValue() - Step);
4180 GenerateOffset(G,Offset);
4181 }
4182 }
4183 }
4184 }
4185 }
4186for (ImmediateOffset : Worklist)
4187 GenerateOffset(G,Offset);
4188
4189 ImmediateImm =ExtractImmediate(G, SE);
4190if (G->isZero() ||Imm.isZero() ||
4191 !Base.BaseOffset.isCompatibleImmediate(Imm))
4192return;
4193 FormulaF =Base;
4194F.BaseOffset =F.BaseOffset.addUnsigned(Imm);
4195if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,F))
4196return;
4197if (IsScaledReg) {
4198F.ScaledReg =G;
4199 }else {
4200F.BaseRegs[Idx] =G;
4201// We may generate non canonical Formula if G is a recurrent expr reg
4202// related with current loop while F.ScaledReg is not.
4203F.canonicalize(*L);
4204 }
4205 (void)InsertFormula(LU, LUIdx,F);
4206}
4207
4208/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
4209void LSRInstance::GenerateConstantOffsets(LSRUse &LU,unsigned LUIdx,
4210 FormulaBase) {
4211// TODO: For now, just add the min and max offset, because it usually isn't
4212// worthwhile looking at everything inbetween.
4213SmallVector<Immediate, 2> Worklist;
4214 Worklist.push_back(LU.MinOffset);
4215if (LU.MaxOffset != LU.MinOffset)
4216 Worklist.push_back(LU.MaxOffset);
4217
4218for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i)
4219 GenerateConstantOffsetsImpl(LU, LUIdx,Base, Worklist, i);
4220if (Base.Scale == 1)
4221 GenerateConstantOffsetsImpl(LU, LUIdx,Base, Worklist,/* Idx */ -1,
4222/* IsScaledReg */true);
4223}
4224
4225/// For ICmpZero, check to see if we can scale up the comparison. For example, x
4226/// == y -> x*c == y*c.
4227void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,unsigned LUIdx,
4228 FormulaBase) {
4229if (LU.Kind != LSRUse::ICmpZero)return;
4230
4231// Determine the integer type for the base formula.
4232Type *IntTy =Base.getType();
4233if (!IntTy)return;
4234if (SE.getTypeSizeInBits(IntTy) > 64)return;
4235
4236// Don't do this if there is more than one offset.
4237if (LU.MinOffset != LU.MaxOffset)return;
4238
4239// Check if transformation is valid. It is illegal to multiply pointer.
4240if (Base.ScaledReg &&Base.ScaledReg->getType()->isPointerTy())
4241return;
4242for (constSCEV *BaseReg :Base.BaseRegs)
4243if (BaseReg->getType()->isPointerTy())
4244return;
4245assert(!Base.BaseGV &&"ICmpZero use is not legal!");
4246
4247// Check each interesting stride.
4248for (int64_t Factor : Factors) {
4249// Check that Factor can be represented by IntTy
4250if (!ConstantInt::isValueValidForType(IntTy, Factor))
4251continue;
4252// Check that the multiplication doesn't overflow.
4253if (Base.BaseOffset.isMin() && Factor == -1)
4254continue;
4255// Not supporting scalable immediates.
4256if (Base.BaseOffset.isNonZero() &&Base.BaseOffset.isScalable())
4257continue;
4258 Immediate NewBaseOffset =Base.BaseOffset.mulUnsigned(Factor);
4259assert(Factor != 0 &&"Zero factor not expected!");
4260if (NewBaseOffset.getFixedValue() / Factor !=
4261Base.BaseOffset.getFixedValue())
4262continue;
4263// If the offset will be truncated at this use, check that it is in bounds.
4264if (!IntTy->isPointerTy() &&
4265 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue()))
4266continue;
4267
4268// Check that multiplying with the use offset doesn't overflow.
4269 ImmediateOffset = LU.MinOffset;
4270if (Offset.isMin() && Factor == -1)
4271continue;
4272Offset =Offset.mulUnsigned(Factor);
4273if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4274continue;
4275// If the offset will be truncated at this use, check that it is in bounds.
4276if (!IntTy->isPointerTy() &&
4277 !ConstantInt::isValueValidForType(IntTy,Offset.getFixedValue()))
4278continue;
4279
4280 FormulaF =Base;
4281F.BaseOffset = NewBaseOffset;
4282
4283// Check that this scale is legal.
4284if (!isLegalUse(TTI,Offset,Offset, LU.Kind, LU.AccessTy,F))
4285continue;
4286
4287// Compensate for the use having MinOffset built into it.
4288F.BaseOffset =F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);
4289
4290constSCEV *FactorS = SE.getConstant(IntTy, Factor);
4291
4292// Check that multiplying with each base register doesn't overflow.
4293for (size_t i = 0, e =F.BaseRegs.size(); i != e; ++i) {
4294F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
4295if (getExactSDiv(F.BaseRegs[i], FactorS, SE) !=Base.BaseRegs[i])
4296goto next;
4297 }
4298
4299// Check that multiplying with the scaled register doesn't overflow.
4300if (F.ScaledReg) {
4301F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
4302if (getExactSDiv(F.ScaledReg, FactorS, SE) !=Base.ScaledReg)
4303continue;
4304 }
4305
4306// Check that multiplying with the unfolded offset doesn't overflow.
4307if (F.UnfoldedOffset.isNonZero()) {
4308if (F.UnfoldedOffset.isMin() && Factor == -1)
4309continue;
4310F.UnfoldedOffset =F.UnfoldedOffset.mulUnsigned(Factor);
4311if (F.UnfoldedOffset.getFixedValue() / Factor !=
4312Base.UnfoldedOffset.getFixedValue())
4313continue;
4314// If the offset will be truncated, check that it is in bounds.
4315if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(
4316 IntTy,F.UnfoldedOffset.getFixedValue()))
4317continue;
4318 }
4319
4320// If we make it here and it's legal, add it.
4321 (void)InsertFormula(LU, LUIdx,F);
4322 next:;
4323 }
4324}
4325
4326/// Generate stride factor reuse formulae by making use of scaled-offset address
4327/// modes, for example.
4328void LSRInstance::GenerateScales(LSRUse &LU,unsigned LUIdx, FormulaBase) {
4329// Determine the integer type for the base formula.
4330Type *IntTy =Base.getType();
4331if (!IntTy)return;
4332
4333// If this Formula already has a scaled register, we can't add another one.
4334// Try to unscale the formula to generate a better scale.
4335if (Base.Scale != 0 && !Base.unscale())
4336return;
4337
4338assert(Base.Scale == 0 &&"unscale did not did its job!");
4339
4340// Check each interesting stride.
4341for (int64_t Factor : Factors) {
4342Base.Scale = Factor;
4343Base.HasBaseReg =Base.BaseRegs.size() > 1;
4344// Check whether this scale is going to be legal.
4345if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4346Base)) {
4347// As a special-case, handle special out-of-loop Basic users specially.
4348// TODO: Reconsider this special case.
4349if (LU.Kind == LSRUse::Basic &&
4350isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4351 LU.AccessTy,Base) &&
4352 LU.AllFixupsOutsideLoop)
4353 LU.Kind = LSRUse::Special;
4354else
4355continue;
4356 }
4357// For an ICmpZero, negating a solitary base register won't lead to
4358// new solutions.
4359if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&
4360Base.BaseOffset.isZero() && !Base.BaseGV)
4361continue;
4362// For each addrec base reg, if its loop is current loop, apply the scale.
4363for (size_t i = 0, e =Base.BaseRegs.size(); i != e; ++i) {
4364constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
4365if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
4366constSCEV *FactorS = SE.getConstant(IntTy, Factor);
4367if (FactorS->isZero())
4368continue;
4369// Divide out the factor, ignoring high bits, since we'll be
4370// scaling the value back up in the end.
4371if (constSCEV *Quotient =getExactSDiv(AR, FactorS, SE,true))
4372if (!Quotient->isZero()) {
4373// TODO: This could be optimized to avoid all the copying.
4374 FormulaF =Base;
4375F.ScaledReg = Quotient;
4376F.deleteBaseReg(F.BaseRegs[i]);
4377// The canonical representation of 1*reg is reg, which is already in
4378// Base. In that case, do not try to insert the formula, it will be
4379// rejected anyway.
4380if (F.Scale == 1 && (F.BaseRegs.empty() ||
4381 (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
4382continue;
4383// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
4384// non canonical Formula with ScaledReg's loop not being L.
4385if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
4386F.canonicalize(*L);
4387 (void)InsertFormula(LU, LUIdx,F);
4388 }
4389 }
4390 }
4391 }
4392}
4393
4394/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.
4395/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then
4396/// perform the extension/truncate and normalize again, as the normalized form
4397/// can result in folds that are not valid in the post-inc use contexts. The
4398/// expressions for all PostIncLoopSets must match, otherwise return nullptr.
4399staticconstSCEV *
4400getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet>Loops,
4401constSCEV *Expr,Type *ToTy,
4402ScalarEvolution &SE) {
4403constSCEV *Result =nullptr;
4404for (auto &L :Loops) {
4405auto *DenormExpr =denormalizeForPostIncUse(Expr, L, SE);
4406constSCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);
4407constSCEV *New =normalizeForPostIncUse(NewDenormExpr, L, SE);
4408if (!New || (Result && New != Result))
4409returnnullptr;
4410 Result = New;
4411 }
4412
4413assert(Result &&"failed to create expression");
4414return Result;
4415}
4416
4417/// Generate reuse formulae from different IV types.
4418void LSRInstance::GenerateTruncates(LSRUse &LU,unsigned LUIdx, FormulaBase) {
4419// Don't bother truncating symbolic values.
4420if (Base.BaseGV)return;
4421
4422// Determine the integer type for the base formula.
4423Type *DstTy =Base.getType();
4424if (!DstTy)return;
4425if (DstTy->isPointerTy())
4426return;
4427
4428// It is invalid to extend a pointer type so exit early if ScaledReg or
4429// any of the BaseRegs are pointers.
4430if (Base.ScaledReg &&Base.ScaledReg->getType()->isPointerTy())
4431return;
4432if (any_of(Base.BaseRegs,
4433 [](constSCEV *S) { return S->getType()->isPointerTy(); }))
4434return;
4435
4436SmallVector<PostIncLoopSet>Loops;
4437for (auto &LF : LU.Fixups)
4438Loops.push_back(LF.PostIncLoops);
4439
4440for (Type *SrcTy : Types) {
4441if (SrcTy != DstTy &&TTI.isTruncateFree(SrcTy, DstTy)) {
4442 FormulaF =Base;
4443
4444// Sometimes SCEV is able to prove zero during ext transform. It may
4445// happen if SCEV did not do all possible transforms while creating the
4446// initial node (maybe due to depth limitations), but it can do them while
4447// taking ext.
4448if (F.ScaledReg) {
4449constSCEV *NewScaledReg =
4450getAnyExtendConsideringPostIncUses(Loops,F.ScaledReg, SrcTy, SE);
4451if (!NewScaledReg || NewScaledReg->isZero())
4452continue;
4453F.ScaledReg = NewScaledReg;
4454 }
4455bool HasZeroBaseReg =false;
4456for (constSCEV *&BaseReg :F.BaseRegs) {
4457constSCEV *NewBaseReg =
4458getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE);
4459if (!NewBaseReg || NewBaseReg->isZero()) {
4460 HasZeroBaseReg =true;
4461break;
4462 }
4463 BaseReg = NewBaseReg;
4464 }
4465if (HasZeroBaseReg)
4466continue;
4467
4468// TODO: This assumes we've done basic processing on all uses and
4469// have an idea what the register usage is.
4470if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4471continue;
4472
4473F.canonicalize(*L);
4474 (void)InsertFormula(LU, LUIdx,F);
4475 }
4476 }
4477}
4478
4479namespace{
4480
4481/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
4482/// modifications so that the search phase doesn't have to worry about the data
4483/// structures moving underneath it.
4484structWorkItem {
4485size_t LUIdx;
4486 ImmediateImm;
4487constSCEV *OrigReg;
4488
4489WorkItem(size_t LI, ImmediateI,constSCEV *R)
4490 : LUIdx(LI),Imm(I), OrigReg(R) {}
4491
4492voidprint(raw_ostream &OS)const;
4493voiddump()const;
4494};
4495
4496}// end anonymous namespace
4497
4498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4499void WorkItem::print(raw_ostream &OS) const{
4500OS <<"in formulae referencing " << *OrigReg <<" in use " << LUIdx
4501 <<" , add offset " <<Imm;
4502}
4503
4504LLVM_DUMP_METHODvoid WorkItem::dump() const{
4505print(errs());errs() <<'\n';
4506}
4507#endif
4508
4509/// Look for registers which are a constant distance apart and try to form reuse
4510/// opportunities between them.
4511void LSRInstance::GenerateCrossUseConstantOffsets() {
4512// Group the registers by their value without any added constant offset.
4513usingImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4514
4515DenseMap<const SCEV *, ImmMapTy>Map;
4516DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4517SmallVector<const SCEV *, 8>Sequence;
4518for (constSCEV *Use : RegUses) {
4519constSCEV *Reg =Use;// Make a copy for ExtractImmediate to modify.
4520 ImmediateImm =ExtractImmediate(Reg, SE);
4521auto Pair =Map.insert(std::make_pair(Reg, ImmMapTy()));
4522if (Pair.second)
4523Sequence.push_back(Reg);
4524 Pair.first->second.insert(std::make_pair(Imm,Use));
4525 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
4526 }
4527
4528// Now examine each set of registers with the same base value. Build up
4529// a list of work to do and do the work in a separate step so that we're
4530// not adding formulae and register counts while we're searching.
4531SmallVector<WorkItem, 32> WorkItems;
4532SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4533 UniqueItems;
4534for (constSCEV *Reg : Sequence) {
4535const ImmMapTy &Imms =Map.find(Reg)->second;
4536
4537// It's not worthwhile looking for reuse if there's only one offset.
4538if (Imms.size() == 1)
4539continue;
4540
4541LLVM_DEBUG(dbgs() <<"Generating cross-use offsets for " << *Reg <<':';
4542for (constauto &Entry
4543 : Imms)dbgs()
4544 <<' ' <<Entry.first;
4545dbgs() <<'\n');
4546
4547// Examine each offset.
4548for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4549 J != JE; ++J) {
4550constSCEV *OrigReg = J->second;
4551
4552 Immediate JImm = J->first;
4553constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4554
4555if (!isa<SCEVConstant>(OrigReg) &&
4556 UsedByIndicesMap[Reg].count() == 1) {
4557LLVM_DEBUG(dbgs() <<"Skipping cross-use reuse for " << *OrigReg
4558 <<'\n');
4559continue;
4560 }
4561
4562// Conservatively examine offsets between this orig reg a few selected
4563// other orig regs.
4564 ImmediateFirst = Imms.begin()->first;
4565 ImmediateLast = std::prev(Imms.end())->first;
4566if (!First.isCompatibleImmediate(Last)) {
4567LLVM_DEBUG(dbgs() <<"Skipping cross-use reuse for " << *OrigReg
4568 <<"\n");
4569continue;
4570 }
4571// Only scalable if both terms are scalable, or if one is scalable and
4572// the other is 0.
4573bool Scalable =First.isScalable() ||Last.isScalable();
4574 int64_t FI =First.getKnownMinValue();
4575 int64_t LI =Last.getKnownMinValue();
4576// Compute (First + Last) / 2 without overflow using the fact that
4577// First + Last = 2 * (First + Last) + (First ^ Last).
4578 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4579// If the result is negative and FI is odd and LI even (or vice versa),
4580// we rounded towards -inf. Add 1 in that case, to round towards 0.
4581 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4582 ImmMapTy::const_iterator OtherImms[] = {
4583 Imms.begin(), std::prev(Imms.end()),
4584 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4585for (constauto &M : OtherImms) {
4586if (M == J || M == JE)continue;
4587if (!JImm.isCompatibleImmediate(M->first))
4588continue;
4589
4590// Compute the difference between the two.
4591 ImmediateImm = JImm.subUnsigned(M->first);
4592for (unsigned LUIdx : UsedByIndices.set_bits())
4593// Make a memo of this use, offset, and register tuple.
4594if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
4595 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
4596 }
4597 }
4598 }
4599
4600Map.clear();
4601Sequence.clear();
4602 UsedByIndicesMap.clear();
4603 UniqueItems.clear();
4604
4605// Now iterate through the worklist and add new formulae.
4606for (constWorkItem &WI : WorkItems) {
4607size_t LUIdx = WI.LUIdx;
4608 LSRUse &LU =Uses[LUIdx];
4609 ImmediateImm = WI.Imm;
4610constSCEV *OrigReg = WI.OrigReg;
4611
4612Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
4613constSCEV *NegImmS =Imm.getNegativeSCEV(SE, IntTy);
4614unsignedBitWidth = SE.getTypeSizeInBits(IntTy);
4615
4616// TODO: Use a more targeted data structure.
4617for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4618 FormulaF = LU.Formulae[L];
4619// FIXME: The code for the scaled and unscaled registers looks
4620// very similar but slightly different. Investigate if they
4621// could be merged. That way, we would not have to unscale the
4622// Formula.
4623F.unscale();
4624// Use the immediate in the scaled register.
4625if (F.ScaledReg == OrigReg) {
4626if (!F.BaseOffset.isCompatibleImmediate(Imm))
4627continue;
4628 ImmediateOffset =F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));
4629// Don't create 50 + reg(-50).
4630constSCEV *S =Offset.getNegativeSCEV(SE, IntTy);
4631if (F.referencesReg(S))
4632continue;
4633 Formula NewF =F;
4634 NewF.BaseOffset =Offset;
4635if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4636 NewF))
4637continue;
4638 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
4639
4640// If the new scale is a constant in a register, and adding the constant
4641// value to the immediate would produce a value closer to zero than the
4642// immediate itself, then the formula isn't worthwhile.
4643if (constSCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4644// FIXME: Do we need to do something for scalable immediates here?
4645// A scalable SCEV won't be constant, but we might still have
4646// something in the offset? Bail out for now to be safe.
4647if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4648continue;
4649if (C->getValue()->isNegative() !=
4650 (NewF.BaseOffset.isLessThanZero()) &&
4651 (C->getAPInt().abs() *APInt(BitWidth,F.Scale))
4652 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4653continue;
4654 }
4655
4656// OK, looks good.
4657 NewF.canonicalize(*this->L);
4658 (void)InsertFormula(LU, LUIdx, NewF);
4659 }else {
4660// Use the immediate in a base register.
4661for (size_tN = 0, NE =F.BaseRegs.size();N !=NE; ++N) {
4662constSCEV *BaseReg =F.BaseRegs[N];
4663if (BaseReg != OrigReg)
4664continue;
4665 Formula NewF =F;
4666if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4667 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4668 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4669continue;
4670 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4671if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
4672 LU.Kind, LU.AccessTy, NewF)) {
4673if (AMK ==TTI::AMK_PostIndexed &&
4674mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
4675continue;
4676 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4677if (!isLegalAddImmediate(TTI, NewUnfoldedOffset))
4678continue;
4679 NewF =F;
4680 NewF.UnfoldedOffset = NewUnfoldedOffset;
4681 }
4682 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
4683
4684// If the new formula has a constant in a register, and adding the
4685// constant value to the immediate would produce a value closer to
4686// zero than the immediate itself, then the formula isn't worthwhile.
4687for (constSCEV *NewReg : NewF.BaseRegs)
4688if (constSCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) {
4689if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4690goto skip_formula;
4691if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())
4692 .abs()
4693 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4694 (C->getAPInt() + NewF.BaseOffset.getFixedValue())
4695 .countr_zero() >=
4696 (unsigned)llvm::countr_zero<uint64_t>(
4697 NewF.BaseOffset.getFixedValue()))
4698goto skip_formula;
4699 }
4700
4701// Ok, looks good.
4702 NewF.canonicalize(*this->L);
4703 (void)InsertFormula(LU, LUIdx, NewF);
4704break;
4705 skip_formula:;
4706 }
4707 }
4708 }
4709 }
4710}
4711
4712/// Generate formulae for each use.
4713void
4714LSRInstance::GenerateAllReuseFormulae() {
4715// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
4716// queries are more precise.
4717for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
4718 LSRUse &LU =Uses[LUIdx];
4719for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4721for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4722 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4723 }
4724for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
4725 LSRUse &LU =Uses[LUIdx];
4726for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4727 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4728for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4730for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4732for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4733 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4734 }
4735for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
4736 LSRUse &LU =Uses[LUIdx];
4737for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4738 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4739 }
4740
4741 GenerateCrossUseConstantOffsets();
4742
4743LLVM_DEBUG(dbgs() <<"\n"
4744"After generating reuse formulae:\n";
4745 print_uses(dbgs()));
4746}
4747
4748/// If there are multiple formulae with the same set of registers used
4749/// by other uses, pick the best one and delete the others.
4750void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4751DenseSet<const SCEV *> VisitedRegs;
4752SmallPtrSet<const SCEV *, 16> Regs;
4753SmallPtrSet<const SCEV *, 16> LoserRegs;
4754#ifndef NDEBUG
4755bool ChangedFormulae =false;
4756#endif
4757
4758// Collect the best formula for each unique set of shared registers. This
4759// is reset for each use.
4760usingBestFormulaeTy =
4761DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
4762
4763 BestFormulaeTy BestFormulae;
4764
4765for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
4766 LSRUse &LU =Uses[LUIdx];
4767LLVM_DEBUG(dbgs() <<"Filtering for use "; LU.print(dbgs());
4768dbgs() <<'\n');
4769
4770boolAny =false;
4771for (size_t FIdx = 0, NumForms = LU.Formulae.size();
4772 FIdx != NumForms; ++FIdx) {
4773 Formula &F = LU.Formulae[FIdx];
4774
4775// Some formulas are instant losers. For example, they may depend on
4776// nonexistent AddRecs from other loops. These need to be filtered
4777// immediately, otherwise heuristics could choose them over others leading
4778// to an unsatisfactory solution. Passing LoserRegs into RateFormula here
4779// avoids the need to recompute this information across formulae using the
4780// same bad AddRec. Passing LoserRegs is also essential unless we remove
4781// the corresponding bad register from the Regs set.
4782Cost CostF(L, SE,TTI, AMK);
4783 Regs.clear();
4784 CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
4785if (CostF.isLoser()) {
4786// During initial formula generation, undesirable formulae are generated
4787// by uses within other loops that have some non-trivial address mode or
4788// use the postinc form of the IV. LSR needs to provide these formulae
4789// as the basis of rediscovering the desired formula that uses an AddRec
4790// corresponding to the existing phi. Once all formulae have been
4791// generated, these initial losers may be pruned.
4792LLVM_DEBUG(dbgs() <<" Filtering loser ";F.print(dbgs());
4793dbgs() <<"\n");
4794 }
4795else {
4796SmallVector<const SCEV *, 4>Key;
4797for (constSCEV *Reg :F.BaseRegs) {
4798if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4799Key.push_back(Reg);
4800 }
4801if (F.ScaledReg &&
4802 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
4803Key.push_back(F.ScaledReg);
4804// Unstable sort by host order ok, because this is only used for
4805// uniquifying.
4806llvm::sort(Key);
4807
4808 std::pair<BestFormulaeTy::const_iterator, bool>P =
4809 BestFormulae.insert(std::make_pair(Key, FIdx));
4810if (P.second)
4811continue;
4812
4813 Formula &Best = LU.Formulae[P.first->second];
4814
4815Cost CostBest(L, SE,TTI, AMK);
4816 Regs.clear();
4817 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4818if (CostF.isLess(CostBest))
4819std::swap(F, Best);
4820LLVM_DEBUG(dbgs() <<" Filtering out formula ";F.print(dbgs());
4821dbgs() <<"\n"
4822" in favor of formula ";
4823 Best.print(dbgs());dbgs() <<'\n');
4824 }
4825#ifndef NDEBUG
4826 ChangedFormulae =true;
4827#endif
4828 LU.DeleteFormula(F);
4829 --FIdx;
4830 --NumForms;
4831Any =true;
4832 }
4833
4834// Now that we've filtered out some formulae, recompute the Regs set.
4835if (Any)
4836 LU.RecomputeRegs(LUIdx, RegUses);
4837
4838// Reset this to prepare for the next use.
4839 BestFormulae.clear();
4840 }
4841
4842LLVM_DEBUG(if (ChangedFormulae) {
4843dbgs() <<"\n"
4844"After filtering out undesirable candidates:\n";
4845 print_uses(dbgs());
4846 });
4847}
4848
4849/// Estimate the worst-case number of solutions the solver might have to
4850/// consider. It almost never considers this many solutions because it prune the
4851/// search space, but the pruning isn't always sufficient.
4852size_t LSRInstance::EstimateSearchSpaceComplexity() const{
4853size_t Power = 1;
4854for (const LSRUse &LU :Uses) {
4855size_t FSize = LU.Formulae.size();
4856if (FSize >=ComplexityLimit) {
4857 Power =ComplexityLimit;
4858break;
4859 }
4860 Power *= FSize;
4861if (Power >=ComplexityLimit)
4862break;
4863 }
4864return Power;
4865}
4866
4867/// When one formula uses a superset of the registers of another formula, it
4868/// won't help reduce register pressure (though it may not necessarily hurt
4869/// register pressure); remove it to simplify the system.
4870void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4871if (EstimateSearchSpaceComplexity() >=ComplexityLimit) {
4872LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");
4873
4874LLVM_DEBUG(dbgs() <<"Narrowing the search space by eliminating formulae "
4875"which use a superset of registers used by other "
4876"formulae.\n");
4877
4878for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
4879 LSRUse &LU =Uses[LUIdx];
4880boolAny =false;
4881for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4882 Formula &F = LU.Formulae[i];
4883if (F.BaseOffset.isNonZero() &&F.BaseOffset.isScalable())
4884continue;
4885// Look for a formula with a constant or GV in a register. If the use
4886// also has a formula with that same value in an immediate field,
4887// delete the one that uses a register.
4888for (SmallVectorImpl<const SCEV *>::const_iterator
4889I =F.BaseRegs.begin(), E =F.BaseRegs.end();I != E; ++I) {
4890if (constSCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4891 Formula NewF =F;
4892//FIXME: Formulas should store bitwidth to do wrapping properly.
4893// See PR41034.
4894 NewF.BaseOffset =
4895 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4896 (uint64_t)C->getValue()->getSExtValue());
4897 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4898 (I -F.BaseRegs.begin()));
4899if (LU.HasFormulaWithSameRegs(NewF)) {
4900LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());
4901dbgs() <<'\n');
4902 LU.DeleteFormula(F);
4903 --i;
4904 --e;
4905Any =true;
4906break;
4907 }
4908 }elseif (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4909if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4910if (!F.BaseGV) {
4911 Formula NewF =F;
4912 NewF.BaseGV = GV;
4913 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4914 (I -F.BaseRegs.begin()));
4915if (LU.HasFormulaWithSameRegs(NewF)) {
4916LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());
4917dbgs() <<'\n');
4918 LU.DeleteFormula(F);
4919 --i;
4920 --e;
4921Any =true;
4922break;
4923 }
4924 }
4925 }
4926 }
4927 }
4928if (Any)
4929 LU.RecomputeRegs(LUIdx, RegUses);
4930 }
4931
4932LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));
4933 }
4934}
4935
4936/// When there are many registers for expressions like A, A+1, A+2, etc.,
4937/// allocate a single register for them.
4938void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4939if (EstimateSearchSpaceComplexity() <ComplexityLimit)
4940return;
4941
4942LLVM_DEBUG(
4943dbgs() <<"The search space is too complex.\n"
4944"Narrowing the search space by assuming that uses separated "
4945"by a constant offset will use the same registers.\n");
4946
4947// This is especially useful for unrolled loops.
4948
4949for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
4950 LSRUse &LU =Uses[LUIdx];
4951for (const Formula &F : LU.Formulae) {
4952if (F.BaseOffset.isZero() || (F.Scale != 0 &&F.Scale != 1))
4953continue;
4954
4955 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4956if (!LUThatHas)
4957continue;
4958
4959if (!reconcileNewOffset(*LUThatHas,F.BaseOffset,/*HasBaseReg=*/false,
4960 LU.Kind, LU.AccessTy))
4961continue;
4962
4963LLVM_DEBUG(dbgs() <<" Deleting use "; LU.print(dbgs());dbgs() <<'\n');
4964
4965 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4966
4967// Transfer the fixups of LU to LUThatHas.
4968for (LSRFixup &Fixup : LU.Fixups) {
4969Fixup.Offset +=F.BaseOffset;
4970 LUThatHas->pushFixup(Fixup);
4971LLVM_DEBUG(dbgs() <<"New fixup has offset " <<Fixup.Offset <<'\n');
4972 }
4973
4974// Delete formulae from the new use which are no longer legal.
4975boolAny =false;
4976for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4977 Formula &F = LUThatHas->Formulae[i];
4978if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4979 LUThatHas->Kind, LUThatHas->AccessTy,F)) {
4980LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());dbgs() <<'\n');
4981 LUThatHas->DeleteFormula(F);
4982 --i;
4983 --e;
4984Any =true;
4985 }
4986 }
4987
4988if (Any)
4989 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4990
4991// Delete the old use.
4992 DeleteUse(LU, LUIdx);
4993 --LUIdx;
4994 --NumUses;
4995break;
4996 }
4997 }
4998
4999LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));
5000}
5001
5002/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
5003/// we've done more filtering, as it may be able to find more formulae to
5004/// eliminate.
5005void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5006if (EstimateSearchSpaceComplexity() >=ComplexityLimit) {
5007LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");
5008
5009LLVM_DEBUG(dbgs() <<"Narrowing the search space by re-filtering out "
5010"undesirable dedicated registers.\n");
5011
5012 FilterOutUndesirableDedicatedRegisters();
5013
5014LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));
5015 }
5016}
5017
5018/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
5019/// Pick the best one and delete the others.
5020/// This narrowing heuristic is to keep as many formulae with different
5021/// Scale and ScaledReg pair as possible while narrowing the search space.
5022/// The benefit is that it is more likely to find out a better solution
5023/// from a formulae set with more Scale and ScaledReg variations than
5024/// a formulae set with the same Scale and ScaledReg. The picking winner
5025/// reg heuristic will often keep the formulae with the same Scale and
5026/// ScaledReg and filter others, and we want to avoid that if possible.
5027void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5028if (EstimateSearchSpaceComplexity() <ComplexityLimit)
5029return;
5030
5031LLVM_DEBUG(
5032dbgs() <<"The search space is too complex.\n"
5033"Narrowing the search space by choosing the best Formula "
5034"from the Formulae with the same Scale and ScaledReg.\n");
5035
5036// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
5037usingBestFormulaeTy =DenseMap<std::pair<const SCEV *, int64_t>,size_t>;
5038
5039 BestFormulaeTy BestFormulae;
5040#ifndef NDEBUG
5041bool ChangedFormulae =false;
5042#endif
5043DenseSet<const SCEV *> VisitedRegs;
5044SmallPtrSet<const SCEV *, 16> Regs;
5045
5046for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
5047 LSRUse &LU =Uses[LUIdx];
5048LLVM_DEBUG(dbgs() <<"Filtering for use "; LU.print(dbgs());
5049dbgs() <<'\n');
5050
5051// Return true if Formula FA is better than Formula FB.
5052auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5053// First we will try to choose the Formula with fewer new registers.
5054// For a register used by current Formula, the more the register is
5055// shared among LSRUses, the less we increase the register number
5056// counter of the formula.
5057size_t FARegNum = 0;
5058for (constSCEV *Reg : FA.BaseRegs) {
5059constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5060 FARegNum += (NumUses - UsedByIndices.count() + 1);
5061 }
5062size_t FBRegNum = 0;
5063for (constSCEV *Reg : FB.BaseRegs) {
5064constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5065 FBRegNum += (NumUses - UsedByIndices.count() + 1);
5066 }
5067if (FARegNum != FBRegNum)
5068return FARegNum < FBRegNum;
5069
5070// If the new register numbers are the same, choose the Formula with
5071// less Cost.
5072Cost CostFA(L, SE,TTI, AMK);
5073Cost CostFB(L, SE,TTI, AMK);
5074 Regs.clear();
5075 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
5076 Regs.clear();
5077 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
5078return CostFA.isLess(CostFB);
5079 };
5080
5081boolAny =false;
5082for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5083 ++FIdx) {
5084 Formula &F = LU.Formulae[FIdx];
5085if (!F.ScaledReg)
5086continue;
5087autoP = BestFormulae.insert({{F.ScaledReg,F.Scale}, FIdx});
5088if (P.second)
5089continue;
5090
5091 Formula &Best = LU.Formulae[P.first->second];
5092if (IsBetterThan(F, Best))
5093std::swap(F, Best);
5094LLVM_DEBUG(dbgs() <<" Filtering out formula ";F.print(dbgs());
5095dbgs() <<"\n"
5096" in favor of formula ";
5097 Best.print(dbgs());dbgs() <<'\n');
5098#ifndef NDEBUG
5099 ChangedFormulae =true;
5100#endif
5101 LU.DeleteFormula(F);
5102 --FIdx;
5103 --NumForms;
5104Any =true;
5105 }
5106if (Any)
5107 LU.RecomputeRegs(LUIdx, RegUses);
5108
5109// Reset this to prepare for the next use.
5110 BestFormulae.clear();
5111 }
5112
5113LLVM_DEBUG(if (ChangedFormulae) {
5114dbgs() <<"\n"
5115"After filtering out undesirable candidates:\n";
5116 print_uses(dbgs());
5117 });
5118}
5119
5120/// If we are over the complexity limit, filter out any post-inc prefering
5121/// variables to only post-inc values.
5122void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5123if (AMK !=TTI::AMK_PostIndexed)
5124return;
5125if (EstimateSearchSpaceComplexity() <ComplexityLimit)
5126return;
5127
5128LLVM_DEBUG(dbgs() <<"The search space is too complex.\n"
5129"Narrowing the search space by choosing the lowest "
5130"register Formula for PostInc Uses.\n");
5131
5132for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
5133 LSRUse &LU =Uses[LUIdx];
5134
5135if (LU.Kind != LSRUse::Address)
5136continue;
5137if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&
5138 !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))
5139continue;
5140
5141size_t MinRegs = std::numeric_limits<size_t>::max();
5142for (const Formula &F : LU.Formulae)
5143 MinRegs = std::min(F.getNumRegs(), MinRegs);
5144
5145boolAny =false;
5146for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5147 ++FIdx) {
5148 Formula &F = LU.Formulae[FIdx];
5149if (F.getNumRegs() > MinRegs) {
5150LLVM_DEBUG(dbgs() <<" Filtering out formula ";F.print(dbgs());
5151dbgs() <<"\n");
5152 LU.DeleteFormula(F);
5153 --FIdx;
5154 --NumForms;
5155Any =true;
5156 }
5157 }
5158if (Any)
5159 LU.RecomputeRegs(LUIdx, RegUses);
5160
5161if (EstimateSearchSpaceComplexity() <ComplexityLimit)
5162break;
5163 }
5164
5165LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));
5166}
5167
5168/// The function delete formulas with high registers number expectation.
5169/// Assuming we don't know the value of each formula (already delete
5170/// all inefficient), generate probability of not selecting for each
5171/// register.
5172/// For example,
5173/// Use1:
5174/// reg(a) + reg({0,+,1})
5175/// reg(a) + reg({-1,+,1}) + 1
5176/// reg({a,+,1})
5177/// Use2:
5178/// reg(b) + reg({0,+,1})
5179/// reg(b) + reg({-1,+,1}) + 1
5180/// reg({b,+,1})
5181/// Use3:
5182/// reg(c) + reg(b) + reg({0,+,1})
5183/// reg(c) + reg({b,+,1})
5184///
5185/// Probability of not selecting
5186/// Use1 Use2 Use3
5187/// reg(a) (1/3) * 1 * 1
5188/// reg(b) 1 * (1/3) * (1/2)
5189/// reg({0,+,1}) (2/3) * (2/3) * (1/2)
5190/// reg({-1,+,1}) (2/3) * (2/3) * 1
5191/// reg({a,+,1}) (2/3) * 1 * 1
5192/// reg({b,+,1}) 1 * (2/3) * (2/3)
5193/// reg(c) 1 * 1 * 0
5194///
5195/// Now count registers number mathematical expectation for each formula:
5196/// Note that for each use we exclude probability if not selecting for the use.
5197/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
5198/// probabilty 1/3 of not selecting for Use1).
5199/// Use1:
5200/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
5201/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
5202/// reg({a,+,1}) 1
5203/// Use2:
5204/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
5205/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
5206/// reg({b,+,1}) 2/3
5207/// Use3:
5208/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
5209/// reg(c) + reg({b,+,1}) 1 + 2/3
5210void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5211if (EstimateSearchSpaceComplexity() <ComplexityLimit)
5212return;
5213// Ok, we have too many of formulae on our hands to conveniently handle.
5214// Use a rough heuristic to thin out the list.
5215
5216// Set of Regs wich will be 100% used in final solution.
5217// Used in each formula of a solution (in example above this is reg(c)).
5218// We can skip them in calculations.
5219SmallPtrSet<const SCEV *, 4> UniqRegs;
5220LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");
5221
5222// Map each register to probability of not selecting
5223 DenseMap <const SCEV *, float> RegNumMap;
5224for (constSCEV *Reg : RegUses) {
5225if (UniqRegs.count(Reg))
5226continue;
5227float PNotSel = 1;
5228for (const LSRUse &LU :Uses) {
5229if (!LU.Regs.count(Reg))
5230continue;
5231floatP = LU.getNotSelectedProbability(Reg);
5232if (P != 0.0)
5233 PNotSel *=P;
5234else
5235 UniqRegs.insert(Reg);
5236 }
5237 RegNumMap.insert(std::make_pair(Reg, PNotSel));
5238 }
5239
5240LLVM_DEBUG(
5241dbgs() <<"Narrowing the search space by deleting costly formulas\n");
5242
5243// Delete formulas where registers number expectation is high.
5244for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
5245 LSRUse &LU =Uses[LUIdx];
5246// If nothing to delete - continue.
5247if (LU.Formulae.size() < 2)
5248continue;
5249// This is temporary solution to test performance. Float should be
5250// replaced with round independent type (based on integers) to avoid
5251// different results for different target builds.
5252float FMinRegNum = LU.Formulae[0].getNumRegs();
5253float FMinARegNum = LU.Formulae[0].getNumRegs();
5254size_t MinIdx = 0;
5255for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5256 Formula &F = LU.Formulae[i];
5257float FRegNum = 0;
5258float FARegNum = 0;
5259for (constSCEV *BaseReg :F.BaseRegs) {
5260if (UniqRegs.count(BaseReg))
5261continue;
5262 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5263if (isa<SCEVAddRecExpr>(BaseReg))
5264 FARegNum +=
5265 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5266 }
5267if (constSCEV *ScaledReg =F.ScaledReg) {
5268if (!UniqRegs.count(ScaledReg)) {
5269 FRegNum +=
5270 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5271if (isa<SCEVAddRecExpr>(ScaledReg))
5272 FARegNum +=
5273 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5274 }
5275 }
5276if (FMinRegNum > FRegNum ||
5277 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5278 FMinRegNum = FRegNum;
5279 FMinARegNum = FARegNum;
5280 MinIdx = i;
5281 }
5282 }
5283LLVM_DEBUG(dbgs() <<" The formula "; LU.Formulae[MinIdx].print(dbgs());
5284dbgs() <<" with min reg num " << FMinRegNum <<'\n');
5285if (MinIdx != 0)
5286std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5287while (LU.Formulae.size() != 1) {
5288LLVM_DEBUG(dbgs() <<" Deleting "; LU.Formulae.back().print(dbgs());
5289dbgs() <<'\n');
5290 LU.Formulae.pop_back();
5291 }
5292 LU.RecomputeRegs(LUIdx, RegUses);
5293assert(LU.Formulae.size() == 1 &&"Should be exactly 1 min regs formula");
5294 Formula &F = LU.Formulae[0];
5295LLVM_DEBUG(dbgs() <<" Leaving only ";F.print(dbgs());dbgs() <<'\n');
5296// When we choose the formula, the regs become unique.
5297 UniqRegs.insert(F.BaseRegs.begin(),F.BaseRegs.end());
5298if (F.ScaledReg)
5299 UniqRegs.insert(F.ScaledReg);
5300 }
5301LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));
5302}
5303
5304// Check if Best and Reg are SCEVs separated by a constant amount C, and if so
5305// would the addressing offset +C would be legal where the negative offset -C is
5306// not.
5307staticboolIsSimplerBaseSCEVForTarget(constTargetTransformInfo &TTI,
5308ScalarEvolution &SE,constSCEV *Best,
5309constSCEV *Reg,
5310 MemAccessTy AccessType) {
5311if (Best->getType() != Reg->getType() ||
5312 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5313 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5314 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5315returnfalse;
5316 std::optional<APInt> Diff = SE.computeConstantDifference(Best, Reg);
5317if (!Diff)
5318returnfalse;
5319
5320returnTTI.isLegalAddressingMode(
5321 AccessType.MemTy,/*BaseGV=*/nullptr,
5322/*BaseOffset=*/Diff->getSExtValue(),
5323/*HasBaseReg=*/true,/*Scale=*/0, AccessType.AddrSpace) &&
5324 !TTI.isLegalAddressingMode(
5325 AccessType.MemTy,/*BaseGV=*/nullptr,
5326/*BaseOffset=*/-Diff->getSExtValue(),
5327/*HasBaseReg=*/true,/*Scale=*/0, AccessType.AddrSpace);
5328}
5329
5330/// Pick a register which seems likely to be profitable, and then in any use
5331/// which has any reference to that register, delete all formulae which do not
5332/// reference that register.
5333void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5334// With all other options exhausted, loop until the system is simple
5335// enough to handle.
5336SmallPtrSet<const SCEV *, 4> Taken;
5337while (EstimateSearchSpaceComplexity() >=ComplexityLimit) {
5338// Ok, we have too many of formulae on our hands to conveniently handle.
5339// Use a rough heuristic to thin out the list.
5340LLVM_DEBUG(dbgs() <<"The search space is too complex.\n");
5341
5342// Pick the register which is used by the most LSRUses, which is likely
5343// to be a good reuse register candidate.
5344constSCEV *Best =nullptr;
5345unsigned BestNum = 0;
5346for (constSCEV *Reg : RegUses) {
5347if (Taken.count(Reg))
5348continue;
5349if (!Best) {
5350 Best =Reg;
5351 BestNum = RegUses.getUsedByIndices(Reg).count();
5352 }else {
5353unsigned Count = RegUses.getUsedByIndices(Reg).count();
5354if (Count > BestNum) {
5355 Best =Reg;
5356 BestNum = Count;
5357 }
5358
5359// If the scores are the same, but the Reg is simpler for the target
5360// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
5361// handle +C but not -C), opt for the simpler formula.
5362if (Count == BestNum) {
5363int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5364if (LUIdx >= 0 &&Uses[LUIdx].Kind == LSRUse::Address &&
5365IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
5366Uses[LUIdx].AccessTy)) {
5367 Best =Reg;
5368 BestNum = Count;
5369 }
5370 }
5371 }
5372 }
5373assert(Best &&"Failed to find best LSRUse candidate");
5374
5375LLVM_DEBUG(dbgs() <<"Narrowing the search space by assuming " << *Best
5376 <<" will yield profitable reuse.\n");
5377 Taken.insert(Best);
5378
5379// In any use with formulae which references this register, delete formulae
5380// which don't reference it.
5381for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx) {
5382 LSRUse &LU =Uses[LUIdx];
5383if (!LU.Regs.count(Best))continue;
5384
5385boolAny =false;
5386for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5387 Formula &F = LU.Formulae[i];
5388if (!F.referencesReg(Best)) {
5389LLVM_DEBUG(dbgs() <<" Deleting ";F.print(dbgs());dbgs() <<'\n');
5390 LU.DeleteFormula(F);
5391 --e;
5392 --i;
5393Any =true;
5394assert(e != 0 &&"Use has no formulae left! Is Regs inconsistent?");
5395continue;
5396 }
5397 }
5398
5399if (Any)
5400 LU.RecomputeRegs(LUIdx, RegUses);
5401 }
5402
5403LLVM_DEBUG(dbgs() <<"After pre-selection:\n"; print_uses(dbgs()));
5404 }
5405}
5406
5407/// If there are an extraordinary number of formulae to choose from, use some
5408/// rough heuristics to prune down the number of formulae. This keeps the main
5409/// solver from taking an extraordinary amount of time in some worst-case
5410/// scenarios.
5411void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5412 NarrowSearchSpaceByDetectingSupersets();
5413 NarrowSearchSpaceByCollapsingUnrolledCode();
5414 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5415if (FilterSameScaledReg)
5416 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5417 NarrowSearchSpaceByFilterPostInc();
5418if (LSRExpNarrow)
5419 NarrowSearchSpaceByDeletingCostlyFormulas();
5420else
5421 NarrowSearchSpaceByPickingWinnerRegs();
5422}
5423
5424/// This is the recursive solver.
5425void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5426Cost &SolutionCost,
5427SmallVectorImpl<const Formula *> &Workspace,
5428constCost &CurCost,
5429constSmallPtrSet<const SCEV *, 16> &CurRegs,
5430DenseSet<const SCEV *> &VisitedRegs) const{
5431// Some ideas:
5432// - prune more:
5433// - use more aggressive filtering
5434// - sort the formula so that the most profitable solutions are found first
5435// - sort the uses too
5436// - search faster:
5437// - don't compute a cost, and then compare. compare while computing a cost
5438// and bail early.
5439// - track register sets with SmallBitVector
5440
5441const LSRUse &LU =Uses[Workspace.size()];
5442
5443// If this use references any register that's already a part of the
5444// in-progress solution, consider it a requirement that a formula must
5445// reference that register in order to be considered. This prunes out
5446// unprofitable searching.
5447SmallSetVector<const SCEV *, 4> ReqRegs;
5448for (constSCEV *S : CurRegs)
5449if (LU.Regs.count(S))
5450 ReqRegs.insert(S);
5451
5452SmallPtrSet<const SCEV *, 16> NewRegs;
5453Cost NewCost(L, SE,TTI, AMK);
5454for (const Formula &F : LU.Formulae) {
5455// Ignore formulae which may not be ideal in terms of register reuse of
5456// ReqRegs. The formula should use all required registers before
5457// introducing new ones.
5458// This can sometimes (notably when trying to favour postinc) lead to
5459// sub-optimial decisions. There it is best left to the cost modelling to
5460// get correct.
5461if (AMK !=TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {
5462int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
5463for (constSCEV *Reg : ReqRegs) {
5464if ((F.ScaledReg &&F.ScaledReg == Reg) ||
5465is_contained(F.BaseRegs, Reg)) {
5466 --NumReqRegsToFind;
5467if (NumReqRegsToFind == 0)
5468break;
5469 }
5470 }
5471if (NumReqRegsToFind != 0) {
5472// If none of the formulae satisfied the required registers, then we could
5473// clear ReqRegs and try again. Currently, we simply give up in this case.
5474continue;
5475 }
5476 }
5477
5478// Evaluate the cost of the current formula. If it's already worse than
5479// the current best, prune the search at that point.
5480 NewCost = CurCost;
5481 NewRegs = CurRegs;
5482 NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
5483if (NewCost.isLess(SolutionCost)) {
5484 Workspace.push_back(&F);
5485if (Workspace.size() !=Uses.size()) {
5486 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5487 NewRegs, VisitedRegs);
5488if (F.getNumRegs() == 1 && Workspace.size() == 1)
5489 VisitedRegs.insert(F.ScaledReg ?F.ScaledReg :F.BaseRegs[0]);
5490 }else {
5491LLVM_DEBUG(dbgs() <<"New best at "; NewCost.print(dbgs());
5492dbgs() <<".\nRegs:\n";
5493for (constSCEV *S : NewRegs)dbgs()
5494 <<"- " << *S <<"\n";
5495dbgs() <<'\n');
5496
5497 SolutionCost = NewCost;
5498 Solution = Workspace;
5499 }
5500 Workspace.pop_back();
5501 }
5502 }
5503}
5504
5505/// Choose one formula from each use. Return the results in the given Solution
5506/// vector.
5507void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const{
5508SmallVector<const Formula *, 8> Workspace;
5509Cost SolutionCost(L, SE,TTI, AMK);
5510 SolutionCost.Lose();
5511Cost CurCost(L, SE,TTI, AMK);
5512SmallPtrSet<const SCEV *, 16> CurRegs;
5513DenseSet<const SCEV *> VisitedRegs;
5514 Workspace.reserve(Uses.size());
5515
5516// SolveRecurse does all the work.
5517 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5518 CurRegs, VisitedRegs);
5519if (Solution.empty()) {
5520LLVM_DEBUG(dbgs() <<"\nNo Satisfactory Solution\n");
5521return;
5522 }
5523
5524// Ok, we've now made all our decisions.
5525LLVM_DEBUG(dbgs() <<"\n"
5526"The chosen solution requires ";
5527 SolutionCost.print(dbgs());dbgs() <<":\n";
5528for (size_t i = 0, e =Uses.size(); i != e; ++i) {
5529dbgs() <<" ";
5530Uses[i].print(dbgs());
5531dbgs() <<"\n"
5532" ";
5533 Solution[i]->print(dbgs());
5534dbgs() <<'\n';
5535 });
5536
5537assert(Solution.size() ==Uses.size() &&"Malformed solution!");
5538
5539constbool EnableDropUnprofitableSolution = [&] {
5540switch (AllowDropSolutionIfLessProfitable) {
5541casecl::BOU_TRUE:
5542returntrue;
5543casecl::BOU_FALSE:
5544returnfalse;
5545casecl::BOU_UNSET:
5546returnTTI.shouldDropLSRSolutionIfLessProfitable();
5547 }
5548llvm_unreachable("Unhandled cl::boolOrDefault enum");
5549 }();
5550
5551if (BaselineCost.isLess(SolutionCost)) {
5552if (!EnableDropUnprofitableSolution)
5553LLVM_DEBUG(
5554dbgs() <<"Baseline is more profitable than chosen solution, "
5555"add option 'lsr-drop-solution' to drop LSR solution.\n");
5556else {
5557LLVM_DEBUG(dbgs() <<"Baseline is more profitable than chosen "
5558"solution, dropping LSR solution.\n";);
5559 Solution.clear();
5560 }
5561 }
5562}
5563
5564/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
5565/// we can go while still being dominated by the input positions. This helps
5566/// canonicalize the insert position, which encourages sharing.
5567BasicBlock::iterator
5568LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
5569constSmallVectorImpl<Instruction *> &Inputs)
5570 const{
5571Instruction *Tentative = &*IP;
5572while (true) {
5573bool AllDominate =true;
5574Instruction *BetterPos =nullptr;
5575// Don't bother attempting to insert before a catchswitch, their basic block
5576// cannot have other non-PHI instructions.
5577if (isa<CatchSwitchInst>(Tentative))
5578return IP;
5579
5580for (Instruction *Inst : Inputs) {
5581if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5582 AllDominate =false;
5583break;
5584 }
5585// Attempt to find an insert position in the middle of the block,
5586// instead of at the end, so that it can be used for other expansions.
5587if (Tentative->getParent() == Inst->getParent() &&
5588 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5589 BetterPos = &*std::next(BasicBlock::iterator(Inst));
5590 }
5591if (!AllDominate)
5592break;
5593if (BetterPos)
5594 IP = BetterPos->getIterator();
5595else
5596 IP = Tentative->getIterator();
5597
5598constLoop *IPLoop = LI.getLoopFor(IP->getParent());
5599unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
5600
5601BasicBlock *IDom;
5602for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
5603if (!Rung)return IP;
5604 Rung = Rung->getIDom();
5605if (!Rung)return IP;
5606 IDom = Rung->getBlock();
5607
5608// Don't climb into a loop though.
5609constLoop *IDomLoop = LI.getLoopFor(IDom);
5610unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
5611if (IDomDepth <= IPLoopDepth &&
5612 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5613break;
5614 }
5615
5616 Tentative = IDom->getTerminator();
5617 }
5618
5619return IP;
5620}
5621
5622/// Determine an input position which will be dominated by the operands and
5623/// which will dominate the result.
5624BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
5625BasicBlock::iterator LowestIP,const LSRFixup &LF,const LSRUse &LU) const{
5626// Collect some instructions which must be dominated by the
5627// expanding replacement. These must be dominated by any operands that
5628// will be required in the expansion.
5629SmallVector<Instruction *, 4> Inputs;
5630if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
5631 Inputs.push_back(I);
5632if (LU.Kind == LSRUse::ICmpZero)
5633if (Instruction *I =
5634 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5635 Inputs.push_back(I);
5636if (LF.PostIncLoops.count(L)) {
5637if (LF.isUseFullyOutsideLoop(L))
5638 Inputs.push_back(L->getLoopLatch()->getTerminator());
5639else
5640 Inputs.push_back(IVIncInsertPos);
5641 }
5642// The expansion must also be dominated by the increment positions of any
5643// loops it for which it is using post-inc mode.
5644for (constLoop *PIL : LF.PostIncLoops) {
5645if (PIL == L)continue;
5646
5647// Be dominated by the loop exit.
5648SmallVector<BasicBlock *, 4> ExitingBlocks;
5649 PIL->getExitingBlocks(ExitingBlocks);
5650if (!ExitingBlocks.empty()) {
5651BasicBlock *BB = ExitingBlocks[0];
5652for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
5653 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5654 Inputs.push_back(BB->getTerminator());
5655 }
5656 }
5657
5658assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5659 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5660"Insertion point must be a normal instruction");
5661
5662// Then, climb up the immediate dominator tree as far as we can go while
5663// still being dominated by the input positions.
5664BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
5665
5666// Don't insert instructions before PHI nodes.
5667while (isa<PHINode>(IP)) ++IP;
5668
5669// Ignore landingpad instructions.
5670while (IP->isEHPad()) ++IP;
5671
5672// Ignore debug intrinsics.
5673while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5674
5675// Set IP below instructions recently inserted by SCEVExpander. This keeps the
5676// IP consistent across expansions and allows the previously inserted
5677// instructions to be reused by subsequent expansion.
5678while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5679 ++IP;
5680
5681return IP;
5682}
5683
5684/// Emit instructions for the leading candidate expression for this LSRUse (this
5685/// is called "expanding").
5686Value *LSRInstance::Expand(const LSRUse &LU,const LSRFixup &LF,
5687const Formula &F,BasicBlock::iterator IP,
5688SmallVectorImpl<WeakTrackingVH> &DeadInsts) const{
5689if (LU.RigidFormula)
5690return LF.OperandValToReplace;
5691
5692// Determine an input position which will be dominated by the operands and
5693// which will dominate the result.
5694 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5695Rewriter.setInsertPoint(&*IP);
5696
5697// Inform the Rewriter if we have a post-increment use, so that it can
5698// perform an advantageous expansion.
5699Rewriter.setPostInc(LF.PostIncLoops);
5700
5701// This is the type that the user actually needs.
5702Type *OpTy = LF.OperandValToReplace->getType();
5703// This will be the type that we'll initially expand to.
5704Type *Ty =F.getType();
5705if (!Ty)
5706// No type known; just expand directly to the ultimate type.
5707 Ty = OpTy;
5708elseif (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
5709// Expand directly to the ultimate type if it's the right size.
5710 Ty = OpTy;
5711// This is the type to do integer arithmetic in.
5712Type *IntTy = SE.getEffectiveSCEVType(Ty);
5713
5714// Build up a list of operands to add together to form the full base.
5715SmallVector<const SCEV *, 8> Ops;
5716
5717// Expand the BaseRegs portion.
5718for (constSCEV *Reg :F.BaseRegs) {
5719assert(!Reg->isZero() &&"Zero allocated in a base register!");
5720
5721// If we're expanding for a post-inc user, make the post-inc adjustment.
5722Reg =denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
5723 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg,nullptr)));
5724 }
5725
5726// Expand the ScaledReg portion.
5727Value *ICmpScaledV =nullptr;
5728if (F.Scale != 0) {
5729constSCEV *ScaledS =F.ScaledReg;
5730
5731// If we're expanding for a post-inc user, make the post-inc adjustment.
5732PostIncLoopSet &Loops =const_cast<PostIncLoopSet &>(LF.PostIncLoops);
5733 ScaledS =denormalizeForPostIncUse(ScaledS,Loops, SE);
5734
5735if (LU.Kind == LSRUse::ICmpZero) {
5736// Expand ScaleReg as if it was part of the base regs.
5737if (F.Scale == 1)
5738 Ops.push_back(
5739 SE.getUnknown(Rewriter.expandCodeFor(ScaledS,nullptr)));
5740else {
5741// An interesting way of "folding" with an icmp is to use a negated
5742// scale, which we'll implement by inserting it into the other operand
5743// of the icmp.
5744assert(F.Scale == -1 &&
5745"The only scale supported by ICmpZero uses is -1!");
5746 ICmpScaledV =Rewriter.expandCodeFor(ScaledS,nullptr);
5747 }
5748 }else {
5749// Otherwise just expand the scaled register and an explicit scale,
5750// which is expected to be matched as part of the address.
5751
5752// Flush the operand list to suppress SCEVExpander hoisting address modes.
5753// Unless the addressing mode will not be folded.
5754if (!Ops.empty() && LU.Kind == LSRUse::Address &&
5755isAMCompletelyFolded(TTI, LU,F)) {
5756Value *FullV =Rewriter.expandCodeFor(SE.getAddExpr(Ops),nullptr);
5757 Ops.clear();
5758 Ops.push_back(SE.getUnknown(FullV));
5759 }
5760 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS,nullptr));
5761if (F.Scale != 1)
5762 ScaledS =
5763 SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(),F.Scale));
5764 Ops.push_back(ScaledS);
5765 }
5766 }
5767
5768// Expand the GV portion.
5769if (F.BaseGV) {
5770// Flush the operand list to suppress SCEVExpander hoisting.
5771if (!Ops.empty()) {
5772Value *FullV =Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);
5773 Ops.clear();
5774 Ops.push_back(SE.getUnknown(FullV));
5775 }
5776 Ops.push_back(SE.getUnknown(F.BaseGV));
5777 }
5778
5779// Flush the operand list to suppress SCEVExpander hoisting of both folded and
5780// unfolded offsets. LSR assumes they both live next to their uses.
5781if (!Ops.empty()) {
5782Value *FullV =Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
5783 Ops.clear();
5784 Ops.push_back(SE.getUnknown(FullV));
5785 }
5786
5787// FIXME: Are we sure we won't get a mismatch here? Is there a way to bail
5788// out at this point, or should we generate a SCEV adding together mixed
5789// offsets?
5790assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5791"Expanding mismatched offsets\n");
5792// Expand the immediate portion.
5793 ImmediateOffset =F.BaseOffset.addUnsigned(LF.Offset);
5794if (Offset.isNonZero()) {
5795if (LU.Kind == LSRUse::ICmpZero) {
5796// The other interesting way of "folding" with an ICmpZero is to use a
5797// negated immediate.
5798if (!ICmpScaledV)
5799 ICmpScaledV =
5800 ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue());
5801else {
5802 Ops.push_back(SE.getUnknown(ICmpScaledV));
5803 ICmpScaledV = ConstantInt::get(IntTy,Offset.getFixedValue());
5804 }
5805 }else {
5806// Just add the immediate values. These again are expected to be matched
5807// as part of the address.
5808 Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));
5809 }
5810 }
5811
5812// Expand the unfolded offset portion.
5813 Immediate UnfoldedOffset =F.UnfoldedOffset;
5814if (UnfoldedOffset.isNonZero()) {
5815// Just add the immediate values.
5816 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5817 }
5818
5819// Emit instructions summing all the operands.
5820constSCEV *FullS = Ops.empty() ?
5821 SE.getConstant(IntTy, 0) :
5822 SE.getAddExpr(Ops);
5823Value *FullV =Rewriter.expandCodeFor(FullS, Ty);
5824
5825// We're done expanding now, so reset the rewriter.
5826Rewriter.clearPostInc();
5827
5828// An ICmpZero Formula represents an ICmp which we're handling as a
5829// comparison against zero. Now that we've expanded an expression for that
5830// form, update the ICmp's other operand.
5831if (LU.Kind == LSRUse::ICmpZero) {
5832ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5833if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))
5834 DeadInsts.emplace_back(OperandIsInstr);
5835assert(!F.BaseGV &&"ICmp does not support folding a global value and "
5836"a scale at the same time!");
5837if (F.Scale == -1) {
5838if (ICmpScaledV->getType() != OpTy) {
5839Instruction *Cast =CastInst::Create(
5840CastInst::getCastOpcode(ICmpScaledV,false, OpTy,false),
5841 ICmpScaledV, OpTy,"tmp", CI->getIterator());
5842 ICmpScaledV = Cast;
5843 }
5844 CI->setOperand(1, ICmpScaledV);
5845 }else {
5846// A scale of 1 means that the scale has been expanded as part of the
5847// base regs.
5848assert((F.Scale == 0 ||F.Scale == 1) &&
5849"ICmp does not support folding a global value and "
5850"a scale at the same time!");
5851Constant *C =ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
5852 -(uint64_t)Offset.getFixedValue());
5853if (C->getType() != OpTy) {
5854C =ConstantFoldCastOperand(
5855CastInst::getCastOpcode(C,false, OpTy,false),C, OpTy,
5856 CI->getDataLayout());
5857assert(C &&"Cast of ConstantInt should have folded");
5858 }
5859
5860 CI->setOperand(1,C);
5861 }
5862 }
5863
5864return FullV;
5865}
5866
5867/// Helper for Rewrite. PHI nodes are special because the use of their operands
5868/// effectively happens in their predecessor blocks, so the expression may need
5869/// to be expanded in multiple places.
5870void LSRInstance::RewriteForPHI(PHINode *PN,const LSRUse &LU,
5871const LSRFixup &LF,const Formula &F,
5872SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5873DenseMap<BasicBlock *, Value *>Inserted;
5874
5875for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
5876if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
5877bool needUpdateFixups =false;
5878BasicBlock *BB = PN->getIncomingBlock(i);
5879
5880// If this is a critical edge, split the edge so that we do not insert
5881// the code on all predecessor/successor paths. We do this unless this
5882// is the canonical backedge for this loop, which complicates post-inc
5883// users.
5884if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
5885 !isa<IndirectBrInst>(BB->getTerminator()) &&
5886 !isa<CatchSwitchInst>(BB->getTerminator())) {
5887BasicBlock *Parent = PN->getParent();
5888Loop *PNLoop = LI.getLoopFor(Parent);
5889if (!PNLoop || Parent != PNLoop->getHeader()) {
5890// Split the critical edge.
5891BasicBlock *NewBB =nullptr;
5892if (!Parent->isLandingPad()) {
5893 NewBB =
5894SplitCriticalEdge(BB, Parent,
5895CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5896 .setMergeIdenticalEdges()
5897 .setKeepOneInputPHIs());
5898 }else {
5899SmallVector<BasicBlock*, 2> NewBBs;
5900DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5901SplitLandingPadPredecessors(Parent, BB,"","", NewBBs, &DTU, &LI);
5902 NewBB = NewBBs[0];
5903 }
5904// If NewBB==NULL, then SplitCriticalEdge refused to split because all
5905// phi predecessors are identical. The simple thing to do is skip
5906// splitting in this case rather than complicate the API.
5907if (NewBB) {
5908// If PN is outside of the loop and BB is in the loop, we want to
5909// move the block to be immediately before the PHI block, not
5910// immediately after BB.
5911if (L->contains(BB) && !L->contains(PN))
5912 NewBB->moveBefore(PN->getParent());
5913
5914// Splitting the edge can reduce the number of PHI entries we have.
5915e = PN->getNumIncomingValues();
5916 BB = NewBB;
5917 i = PN->getBasicBlockIndex(BB);
5918
5919 needUpdateFixups =true;
5920 }
5921 }
5922 }
5923
5924 std::pair<DenseMap<BasicBlock *, Value *>::iterator,bool> Pair =
5925Inserted.insert(std::make_pair(BB,static_cast<Value *>(nullptr)));
5926if (!Pair.second)
5927 PN->setIncomingValue(i, Pair.first->second);
5928else {
5929Value *FullV =
5930 Expand(LU, LF,F, BB->getTerminator()->getIterator(), DeadInsts);
5931
5932// If this is reuse-by-noop-cast, insert the noop cast.
5933Type *OpTy = LF.OperandValToReplace->getType();
5934if (FullV->getType() != OpTy)
5935 FullV =CastInst::Create(
5936CastInst::getCastOpcode(FullV,false, OpTy,false), FullV,
5937 LF.OperandValToReplace->getType(),"tmp",
5938 BB->getTerminator()->getIterator());
5939
5940// If the incoming block for this value is not in the loop, it means the
5941// current PHI is not in a loop exit, so we must create a LCSSA PHI for
5942// the inserted value.
5943if (auto *I = dyn_cast<Instruction>(FullV))
5944if (L->contains(I) && !L->contains(BB))
5945 InsertedNonLCSSAInsts.insert(I);
5946
5947 PN->setIncomingValue(i, FullV);
5948 Pair.first->second = FullV;
5949 }
5950
5951// If LSR splits critical edge and phi node has other pending
5952// fixup operands, we need to update those pending fixups. Otherwise
5953// formulae will not be implemented completely and some instructions
5954// will not be eliminated.
5955if (needUpdateFixups) {
5956for (LSRUse &LU :Uses)
5957for (LSRFixup &Fixup : LU.Fixups)
5958// If fixup is supposed to rewrite some operand in the phi
5959// that was just updated, it may be already moved to
5960// another phi node. Such fixup requires update.
5961if (Fixup.UserInst == PN) {
5962// Check if the operand we try to replace still exists in the
5963// original phi.
5964bool foundInOriginalPHI =false;
5965for (constauto &val : PN->incoming_values())
5966if (val ==Fixup.OperandValToReplace) {
5967 foundInOriginalPHI =true;
5968break;
5969 }
5970
5971// If fixup operand found in original PHI - nothing to do.
5972if (foundInOriginalPHI)
5973continue;
5974
5975// Otherwise it might be moved to another PHI and requires update.
5976// If fixup operand not found in any of the incoming blocks that
5977// means we have already rewritten it - nothing to do.
5978for (constauto &Block : PN->blocks())
5979for (BasicBlock::iteratorI =Block->begin(); isa<PHINode>(I);
5980 ++I) {
5981PHINode *NewPN = cast<PHINode>(I);
5982for (constauto &val : NewPN->incoming_values())
5983if (val ==Fixup.OperandValToReplace)
5984Fixup.UserInst = NewPN;
5985 }
5986 }
5987 }
5988 }
5989}
5990
5991/// Emit instructions for the leading candidate expression for this LSRUse (this
5992/// is called "expanding"), and update the UserInst to reference the newly
5993/// expanded value.
5994void LSRInstance::Rewrite(const LSRUse &LU,const LSRFixup &LF,
5995const Formula &F,
5996SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5997// First, find an insertion point that dominates UserInst. For PHI nodes,
5998// find the nearest block which dominates all the relevant uses.
5999if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
6000 RewriteForPHI(PN, LU, LF,F, DeadInsts);
6001 }else {
6002Value *FullV = Expand(LU, LF,F, LF.UserInst->getIterator(), DeadInsts);
6003
6004// If this is reuse-by-noop-cast, insert the noop cast.
6005Type *OpTy = LF.OperandValToReplace->getType();
6006if (FullV->getType() != OpTy) {
6007Instruction *Cast =
6008CastInst::Create(CastInst::getCastOpcode(FullV,false, OpTy,false),
6009 FullV, OpTy,"tmp", LF.UserInst->getIterator());
6010 FullV = Cast;
6011 }
6012
6013// Update the user. ICmpZero is handled specially here (for now) because
6014// Expand may have updated one of the operands of the icmp already, and
6015// its new value may happen to be equal to LF.OperandValToReplace, in
6016// which case doing replaceUsesOfWith leads to replacing both operands
6017// with the same value. TODO: Reorganize this.
6018if (LU.Kind == LSRUse::ICmpZero)
6019 LF.UserInst->setOperand(0, FullV);
6020else
6021 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
6022 }
6023
6024if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
6025 DeadInsts.emplace_back(OperandIsInstr);
6026}
6027
6028// Trying to hoist the IVInc to loop header if all IVInc users are in
6029// the loop header. It will help backend to generate post index load/store
6030// when the latch block is different from loop header block.
6031staticboolcanHoistIVInc(constTargetTransformInfo &TTI,const LSRFixup &Fixup,
6032const LSRUse &LU,Instruction *IVIncInsertPos,
6033Loop *L) {
6034if (LU.Kind != LSRUse::Address)
6035returnfalse;
6036
6037// For now this code do the conservative optimization, only work for
6038// the header block. Later we can hoist the IVInc to the block post
6039// dominate all users.
6040BasicBlock *LHeader = L->getHeader();
6041if (IVIncInsertPos->getParent() == LHeader)
6042returnfalse;
6043
6044if (!Fixup.OperandValToReplace ||
6045any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
6046 Instruction *UI = cast<Instruction>(U);
6047 return UI->getParent() != LHeader;
6048 }))
6049returnfalse;
6050
6051Instruction *I =Fixup.UserInst;
6052Type *Ty =I->getType();
6053return Ty->isIntegerTy() &&
6054 ((isa<LoadInst>(I) &&TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
6055 (isa<StoreInst>(I) &&TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)));
6056}
6057
6058/// Rewrite all the fixup locations with new values, following the chosen
6059/// solution.
6060void LSRInstance::ImplementSolution(
6061constSmallVectorImpl<const Formula *> &Solution) {
6062// Keep track of instructions we may have made dead, so that
6063// we can remove them after we are done working.
6064SmallVector<WeakTrackingVH, 16> DeadInsts;
6065
6066// Mark phi nodes that terminate chains so the expander tries to reuse them.
6067for (const IVChain &Chain : IVChainVec) {
6068if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
6069Rewriter.setChainedPhi(PN);
6070 }
6071
6072// Expand the new value definitions and update the users.
6073for (size_t LUIdx = 0, NumUses =Uses.size(); LUIdx != NumUses; ++LUIdx)
6074for (const LSRFixup &Fixup :Uses[LUIdx].Fixups) {
6075Instruction *InsertPos =
6076canHoistIVInc(TTI,Fixup,Uses[LUIdx], IVIncInsertPos, L)
6077 ?L->getHeader()->getTerminator()
6078 : IVIncInsertPos;
6079Rewriter.setIVIncInsertPos(L, InsertPos);
6080 Rewrite(Uses[LUIdx],Fixup, *Solution[LUIdx], DeadInsts);
6081 Changed =true;
6082 }
6083
6084auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6085formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);
6086
6087for (const IVChain &Chain : IVChainVec) {
6088 GenerateIVChain(Chain, DeadInsts);
6089 Changed =true;
6090 }
6091
6092for (constWeakVH &IV :Rewriter.getInsertedIVs())
6093if (IV && dyn_cast<Instruction>(&*IV)->getParent())
6094 ScalarEvolutionIVs.push_back(IV);
6095
6096// Clean up after ourselves. This must be done before deleting any
6097// instructions.
6098Rewriter.clear();
6099
6100 Changed |=RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
6101 &TLI, MSSAU);
6102
6103// In our cost analysis above, we assume that each addrec consumes exactly
6104// one register, and arrange to have increments inserted just before the
6105// latch to maximimize the chance this is true. However, if we reused
6106// existing IVs, we now need to move the increments to match our
6107// expectations. Otherwise, our cost modeling results in us having a
6108// chosen a non-optimal result for the actual schedule. (And yes, this
6109// scheduling decision does impact later codegen.)
6110for (PHINode &PN :L->getHeader()->phis()) {
6111BinaryOperator *BO =nullptr;
6112Value *Start =nullptr, *Step =nullptr;
6113if (!matchSimpleRecurrence(&PN, BO, Start, Step))
6114continue;
6115
6116switch (BO->getOpcode()) {
6117case Instruction::Sub:
6118if (BO->getOperand(0) != &PN)
6119// sub is non-commutative - match handling elsewhere in LSR
6120continue;
6121break;
6122case Instruction::Add:
6123break;
6124default:
6125continue;
6126 };
6127
6128if (!isa<Constant>(Step))
6129// If not a constant step, might increase register pressure
6130// (We assume constants have been canonicalized to RHS)
6131continue;
6132
6133if (BO->getParent() == IVIncInsertPos->getParent())
6134// Only bother moving across blocks. Isel can handle block local case.
6135continue;
6136
6137// Can we legally schedule inc at the desired point?
6138if (!llvm::all_of(BO->uses(),
6139 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6140continue;
6141 BO->moveBefore(IVIncInsertPos->getIterator());
6142 Changed =true;
6143 }
6144
6145
6146}
6147
6148LSRInstance::LSRInstance(Loop *L,IVUsers &IU,ScalarEvolution &SE,
6149DominatorTree &DT,LoopInfo &LI,
6150constTargetTransformInfo &TTI,AssumptionCache &AC,
6151TargetLibraryInfo &TLI,MemorySSAUpdater *MSSAU)
6152 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),TTI(TTI),L(L),
6153 MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
6154 ?PreferredAddresingMode
6155 :TTI.getPreferredAddressingMode(L, &SE)),
6156Rewriter(SE,L->getHeader()->getDataLayout(),"lsr",false),
6157 BaselineCost(L, SE,TTI, AMK) {
6158// If LoopSimplify form is not available, stay out of trouble.
6159if (!L->isLoopSimplifyForm())
6160return;
6161
6162// If there's no interesting work to be done, bail early.
6163if (IU.empty())return;
6164
6165// If there's too much analysis to be done, bail early. We won't be able to
6166// model the problem anyway.
6167unsigned NumUsers = 0;
6168for (constIVStrideUse &U : IU) {
6169if (++NumUsers >MaxIVUsers) {
6170 (void)U;
6171LLVM_DEBUG(dbgs() <<"LSR skipping loop, too many IV Users in " << U
6172 <<"\n");
6173return;
6174 }
6175// Bail out if we have a PHI on an EHPad that gets a value from a
6176// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
6177// no good place to stick any instructions.
6178if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
6179auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();
6180if (isa<FuncletPadInst>(FirstNonPHI) ||
6181 isa<CatchSwitchInst>(FirstNonPHI))
6182for (BasicBlock *PredBB : PN->blocks())
6183if (isa<CatchSwitchInst>(PredBB->getFirstNonPHIIt()))
6184return;
6185 }
6186 }
6187
6188LLVM_DEBUG(dbgs() <<"\nLSR on loop ";
6189L->getHeader()->printAsOperand(dbgs(),/*PrintType=*/false);
6190dbgs() <<":\n");
6191
6192// Configure SCEVExpander already now, so the correct mode is used for
6193// isSafeToExpand() checks.
6194#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6195Rewriter.setDebugType(DEBUG_TYPE);
6196#endif
6197Rewriter.disableCanonicalMode();
6198Rewriter.enableLSRMode();
6199
6200// First, perform some low-level loop optimizations.
6201 OptimizeShadowIV();
6202 OptimizeLoopTermCond();
6203
6204// If loop preparation eliminates all interesting IV users, bail.
6205if (IU.empty())return;
6206
6207// Skip nested loops until we can model them better with formulae.
6208if (!L->isInnermost()) {
6209LLVM_DEBUG(dbgs() <<"LSR skipping outer loop " << *L <<"\n");
6210return;
6211 }
6212
6213// Start collecting data and preparing for the solver.
6214// If number of registers is not the major cost, we cannot benefit from the
6215// current profitable chain optimization which is based on number of
6216// registers.
6217// FIXME: add profitable chain optimization for other kinds major cost, for
6218// example number of instructions.
6219if (TTI.isNumRegsMajorCostOfLSR() ||StressIVChain)
6220 CollectChains();
6221 CollectInterestingTypesAndFactors();
6222 CollectFixupsAndInitialFormulae();
6223 CollectLoopInvariantFixupsAndFormulae();
6224
6225if (Uses.empty())
6226return;
6227
6228LLVM_DEBUG(dbgs() <<"LSR found " <<Uses.size() <<" uses:\n";
6229 print_uses(dbgs()));
6230LLVM_DEBUG(dbgs() <<"The baseline solution requires ";
6231 BaselineCost.print(dbgs());dbgs() <<"\n");
6232
6233// Now use the reuse data to generate a bunch of interesting ways
6234// to formulate the values needed for the uses.
6235 GenerateAllReuseFormulae();
6236
6237 FilterOutUndesirableDedicatedRegisters();
6238 NarrowSearchSpaceUsingHeuristics();
6239
6240SmallVector<const Formula *, 8> Solution;
6241 Solve(Solution);
6242
6243// Release memory that is no longer needed.
6244 Factors.clear();
6245Types.clear();
6246 RegUses.clear();
6247
6248if (Solution.empty())
6249return;
6250
6251#ifndef NDEBUG
6252// Formulae should be legal.
6253for (const LSRUse &LU :Uses) {
6254for (const Formula &F : LU.Formulae)
6255assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
6256F) &&"Illegal formula generated!");
6257 };
6258#endif
6259
6260// Now that we've decided what we want, make it so.
6261 ImplementSolution(Solution);
6262}
6263
6264#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6265void LSRInstance::print_factors_and_types(raw_ostream &OS) const{
6266if (Factors.empty() &&Types.empty())return;
6267
6268OS <<"LSR has identified the following interesting factors and types: ";
6269boolFirst =true;
6270
6271for (int64_t Factor : Factors) {
6272if (!First)OS <<", ";
6273First =false;
6274OS <<'*' << Factor;
6275 }
6276
6277for (Type *Ty : Types) {
6278if (!First)OS <<", ";
6279First =false;
6280OS <<'(' << *Ty <<')';
6281 }
6282OS <<'\n';
6283}
6284
6285void LSRInstance::print_fixups(raw_ostream &OS) const{
6286OS <<"LSR is examining the following fixup sites:\n";
6287for (const LSRUse &LU :Uses)
6288for (const LSRFixup &LF : LU.Fixups) {
6289dbgs() <<" ";
6290 LF.print(OS);
6291OS <<'\n';
6292 }
6293}
6294
6295void LSRInstance::print_uses(raw_ostream &OS) const{
6296OS <<"LSR is examining the following uses:\n";
6297for (const LSRUse &LU :Uses) {
6298dbgs() <<" ";
6299 LU.print(OS);
6300OS <<'\n';
6301for (const Formula &F : LU.Formulae) {
6302OS <<" ";
6303F.print(OS);
6304OS <<'\n';
6305 }
6306 }
6307}
6308
6309void LSRInstance::print(raw_ostream &OS) const{
6310 print_factors_and_types(OS);
6311 print_fixups(OS);
6312 print_uses(OS);
6313}
6314
6315LLVM_DUMP_METHODvoid LSRInstance::dump() const{
6316print(errs());errs() <<'\n';
6317}
6318#endif
6319
6320namespace{
6321
6322classLoopStrengthReduce :publicLoopPass {
6323public:
6324staticcharID;// Pass ID, replacement for typeid
6325
6326 LoopStrengthReduce();
6327
6328private:
6329boolrunOnLoop(Loop *L,LPPassManager &LPM)override;
6330voidgetAnalysisUsage(AnalysisUsage &AU)const override;
6331};
6332
6333}// end anonymous namespace
6334
6335LoopStrengthReduce::LoopStrengthReduce() :LoopPass(ID) {
6336initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
6337}
6338
6339void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const{
6340// We split critical edges, so we change the CFG. However, we do update
6341// many analyses if they are around.
6342 AU.addPreservedID(LoopSimplifyID);
6343
6344 AU.addRequired<LoopInfoWrapperPass>();
6345 AU.addPreserved<LoopInfoWrapperPass>();
6346 AU.addRequiredID(LoopSimplifyID);
6347 AU.addRequired<DominatorTreeWrapperPass>();
6348 AU.addPreserved<DominatorTreeWrapperPass>();
6349 AU.addRequired<ScalarEvolutionWrapperPass>();
6350 AU.addPreserved<ScalarEvolutionWrapperPass>();
6351 AU.addRequired<AssumptionCacheTracker>();
6352 AU.addRequired<TargetLibraryInfoWrapperPass>();
6353// Requiring LoopSimplify a second time here prevents IVUsers from running
6354// twice, since LoopSimplify was invalidated by running ScalarEvolution.
6355 AU.addRequiredID(LoopSimplifyID);
6356 AU.addRequired<IVUsersWrapperPass>();
6357 AU.addPreserved<IVUsersWrapperPass>();
6358 AU.addRequired<TargetTransformInfoWrapperPass>();
6359 AU.addPreserved<MemorySSAWrapperPass>();
6360}
6361
6362namespace{
6363
6364/// Enables more convenient iteration over a DWARF expression vector.
6365staticiterator_range<llvm::DIExpression::expr_op_iterator>
6366ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6367llvm::DIExpression::expr_op_iterator Begin =
6368llvm::DIExpression::expr_op_iterator(Expr.begin());
6369llvm::DIExpression::expr_op_iteratorEnd =
6370llvm::DIExpression::expr_op_iterator(Expr.end());
6371return {Begin,End};
6372}
6373
6374structSCEVDbgValueBuilder {
6375 SCEVDbgValueBuilder() =default;
6376 SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
6377
6378void clone(const SCEVDbgValueBuilder &Base) {
6379 LocationOps =Base.LocationOps;
6380 Expr =Base.Expr;
6381 }
6382
6383void clear() {
6384 LocationOps.clear();
6385 Expr.clear();
6386 }
6387
6388 /// The DIExpression as we translate the SCEV.
6389SmallVector<uint64_t, 6> Expr;
6390 /// The location ops of the DIExpression.
6391SmallVector<Value *, 2> LocationOps;
6392
6393void pushOperator(uint64_tOp) { Expr.push_back(Op); }
6394void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
6395
6396 /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
6397 /// in the set of values referenced by the expression.
6398void pushLocation(llvm::Value *V) {
6399 Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
6400auto *It =llvm::find(LocationOps, V);
6401unsigned ArgIndex = 0;
6402if (It != LocationOps.end()) {
6403 ArgIndex = std::distance(LocationOps.begin(), It);
6404 }else {
6405 ArgIndex = LocationOps.size();
6406 LocationOps.push_back(V);
6407 }
6408 Expr.push_back(ArgIndex);
6409 }
6410
6411void pushValue(constSCEVUnknown *U) {
6412llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
6413 pushLocation(V);
6414 }
6415
6416bool pushConst(constSCEVConstant *C) {
6417if (C->getAPInt().getSignificantBits() > 64)
6418returnfalse;
6419 Expr.push_back(llvm::dwarf::DW_OP_consts);
6420 Expr.push_back(C->getAPInt().getSExtValue());
6421returntrue;
6422 }
6423
6424// Iterating the expression as DWARF ops is convenient when updating
6425// DWARF_OP_LLVM_args.
6426iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
6427return ToDwarfOpIter(Expr);
6428 }
6429
6430 /// Several SCEV types are sequences of the same arithmetic operator applied
6431 /// to constants and values that may be extended or truncated.
6432bool pushArithmeticExpr(constllvm::SCEVCommutativeExpr *CommExpr,
6433uint64_t DwarfOp) {
6434assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6435"Expected arithmetic SCEV type");
6436boolSuccess =true;
6437unsigned EmitOperator = 0;
6438for (constauto &Op : CommExpr->operands()) {
6439Success &= pushSCEV(Op);
6440
6441if (EmitOperator >= 1)
6442 pushOperator(DwarfOp);
6443 ++EmitOperator;
6444 }
6445returnSuccess;
6446 }
6447
6448// TODO: Identify and omit noop casts.
6449bool pushCast(constllvm::SCEVCastExpr *C,bool IsSigned) {
6450constllvm::SCEV *Inner =C->getOperand(0);
6451constllvm::Type *Type =C->getType();
6452uint64_t ToWidth =Type->getIntegerBitWidth();
6453boolSuccess = pushSCEV(Inner);
6454uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
6455 IsSigned ? llvm::dwarf::DW_ATE_signed
6456 : llvm::dwarf::DW_ATE_unsigned};
6457for (constauto &Op : CastOps)
6458 pushOperator(Op);
6459returnSuccess;
6460 }
6461
6462// TODO: MinMax - although these haven't been encountered in the test suite.
6463bool pushSCEV(constllvm::SCEV *S) {
6464boolSuccess =true;
6465if (constSCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6466Success &= pushConst(StartInt);
6467
6468 }elseif (constSCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6469if (!U->getValue())
6470returnfalse;
6471 pushLocation(U->getValue());
6472
6473 }elseif (constSCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6474Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6475
6476 }elseif (constSCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6477Success &= pushSCEV(UDiv->getLHS());
6478Success &= pushSCEV(UDiv->getRHS());
6479 pushOperator(llvm::dwarf::DW_OP_div);
6480
6481 }elseif (constSCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6482// Assert if a new and unknown SCEVCastEXpr type is encountered.
6483assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6484 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6485"Unexpected cast type in SCEV.");
6486Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6487
6488 }elseif (constSCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6489Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6490
6491 }elseif (isa<SCEVAddRecExpr>(S)) {
6492// Nested SCEVAddRecExpr are generated by nested loops and are currently
6493// unsupported.
6494returnfalse;
6495
6496 }else {
6497returnfalse;
6498 }
6499returnSuccess;
6500 }
6501
6502 /// Return true if the combination of arithmetic operator and underlying
6503 /// SCEV constant value is an identity function.
6504bool isIdentityFunction(uint64_tOp,constSCEV *S) {
6505if (constSCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
6506if (C->getAPInt().getSignificantBits() > 64)
6507returnfalse;
6508 int64_tI =C->getAPInt().getSExtValue();
6509switch (Op) {
6510case llvm::dwarf::DW_OP_plus:
6511case llvm::dwarf::DW_OP_minus:
6512returnI == 0;
6513case llvm::dwarf::DW_OP_mul:
6514case llvm::dwarf::DW_OP_div:
6515returnI == 1;
6516 }
6517 }
6518returnfalse;
6519 }
6520
6521 /// Convert a SCEV of a value to a DIExpression that is pushed onto the
6522 /// builder's expression stack. The stack should already contain an
6523 /// expression for the iteration count, so that it can be multiplied by
6524 /// the stride and added to the start.
6525 /// Components of the expression are omitted if they are an identity function.
6526 /// Chain (non-affine) SCEVs are not supported.
6527bool SCEVToValueExpr(constllvm::SCEVAddRecExpr &SAR,ScalarEvolution &SE) {
6528assert(SAR.isAffine() &&"Expected affine SCEV");
6529// TODO: Is this check needed?
6530if (isa<SCEVAddRecExpr>(SAR.getStart()))
6531returnfalse;
6532
6533constSCEV *Start = SAR.getStart();
6534constSCEV *Stride = SAR.getStepRecurrence(SE);
6535
6536// Skip pushing arithmetic noops.
6537if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6538if (!pushSCEV(Stride))
6539returnfalse;
6540 pushOperator(llvm::dwarf::DW_OP_mul);
6541 }
6542if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6543if (!pushSCEV(Start))
6544returnfalse;
6545 pushOperator(llvm::dwarf::DW_OP_plus);
6546 }
6547returntrue;
6548 }
6549
6550 /// Create an expression that is an offset from a value (usually the IV).
6551void createOffsetExpr(int64_tOffset,Value *OffsetValue) {
6552 pushLocation(OffsetValue);
6553DIExpression::appendOffset(Expr,Offset);
6554LLVM_DEBUG(
6555dbgs() <<"scev-salvage: Generated IV offset expression. Offset: "
6556 << std::to_string(Offset) <<"\n");
6557 }
6558
6559 /// Combine a translation of the SCEV and the IV to create an expression that
6560 /// recovers a location's value.
6561 /// returns true if an expression was created.
6562bool createIterCountExpr(constSCEV *S,
6563const SCEVDbgValueBuilder &IterationCount,
6564ScalarEvolution &SE) {
6565// SCEVs for SSA values are most frquently of the form
6566// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
6567// This is because %a is a PHI node that is not the IV. However, these
6568// SCEVs have not been observed to result in debuginfo-lossy optimisations,
6569// so its not expected this point will be reached.
6570if (!isa<SCEVAddRecExpr>(S))
6571returnfalse;
6572
6573LLVM_DEBUG(dbgs() <<"scev-salvage: Location to salvage SCEV: " << *S
6574 <<'\n');
6575
6576constauto *Rec = cast<SCEVAddRecExpr>(S);
6577if (!Rec->isAffine())
6578returnfalse;
6579
6580if (S->getExpressionSize() >MaxSCEVSalvageExpressionSize)
6581returnfalse;
6582
6583// Initialise a new builder with the iteration count expression. In
6584// combination with the value's SCEV this enables recovery.
6585 clone(IterationCount);
6586if (!SCEVToValueExpr(*Rec, SE))
6587returnfalse;
6588
6589returntrue;
6590 }
6591
6592 /// Convert a SCEV of a value to a DIExpression that is pushed onto the
6593 /// builder's expression stack. The stack should already contain an
6594 /// expression for the iteration count, so that it can be multiplied by
6595 /// the stride and added to the start.
6596 /// Components of the expression are omitted if they are an identity function.
6597bool SCEVToIterCountExpr(constllvm::SCEVAddRecExpr &SAR,
6598ScalarEvolution &SE) {
6599assert(SAR.isAffine() &&"Expected affine SCEV");
6600if (isa<SCEVAddRecExpr>(SAR.getStart())) {
6601LLVM_DEBUG(dbgs() <<"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6602 << SAR <<'\n');
6603returnfalse;
6604 }
6605constSCEV *Start = SAR.getStart();
6606constSCEV *Stride = SAR.getStepRecurrence(SE);
6607
6608// Skip pushing arithmetic noops.
6609if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6610if (!pushSCEV(Start))
6611returnfalse;
6612 pushOperator(llvm::dwarf::DW_OP_minus);
6613 }
6614if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6615if (!pushSCEV(Stride))
6616returnfalse;
6617 pushOperator(llvm::dwarf::DW_OP_div);
6618 }
6619returntrue;
6620 }
6621
6622// Append the current expression and locations to a location list and an
6623// expression list. Modify the DW_OP_LLVM_arg indexes to account for
6624// the locations already present in the destination list.
6625void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6626SmallVectorImpl<Value *> &DestLocations) {
6627assert(!DestLocations.empty() &&
6628"Expected the locations vector to contain the IV");
6629// The DWARF_OP_LLVM_arg arguments of the expression being appended must be
6630// modified to account for the locations already in the destination vector.
6631// All builders contain the IV as the first location op.
6632assert(!LocationOps.empty() &&
6633"Expected the location ops to contain the IV.");
6634// DestIndexMap[n] contains the index in DestLocations for the nth
6635// location in this SCEVDbgValueBuilder.
6636SmallVector<uint64_t, 2> DestIndexMap;
6637for (constauto &Op : LocationOps) {
6638auto It =find(DestLocations,Op);
6639if (It != DestLocations.end()) {
6640// Location already exists in DestLocations, reuse existing ArgIndex.
6641 DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
6642continue;
6643 }
6644// Location is not in DestLocations, add it.
6645 DestIndexMap.push_back(DestLocations.size());
6646 DestLocations.push_back(Op);
6647 }
6648
6649for (constauto &Op : expr_ops()) {
6650if (Op.getOp() !=dwarf::DW_OP_LLVM_arg) {
6651Op.appendToVector(DestExpr);
6652continue;
6653 }
6654
6655 DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
6656// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
6657// DestIndexMap[n] contains its new index in DestLocations.
6658uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
6659 DestExpr.push_back(NewIndex);
6660 }
6661 }
6662};
6663
6664/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
6665/// and DIExpression.
6666structDVIRecoveryRec {
6667 DVIRecoveryRec(DbgValueInst *DbgValue)
6668 : DbgRef(DbgValue), Expr(DbgValue->getExpression()),
6669 HadLocationArgList(false) {}
6670 DVIRecoveryRec(DbgVariableRecord *DVR)
6671 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
6672
6673PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgRef;
6674DIExpression *Expr;
6675bool HadLocationArgList;
6676SmallVector<WeakVH, 2> LocationOps;
6677SmallVector<const llvm::SCEV *, 2> SCEVs;
6678SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;
6679
6680void clear() {
6681for (auto &RE : RecoveryExprs)
6682 RE.reset();
6683 RecoveryExprs.clear();
6684 }
6685
6686 ~DVIRecoveryRec() { clear(); }
6687};
6688}// namespace
6689
6690/// Returns the total number of DW_OP_llvm_arg operands in the expression.
6691/// This helps in determining if a DIArglist is necessary or can be omitted from
6692/// the dbg.value.
6693staticunsignednumLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
6694auto expr_ops = ToDwarfOpIter(Expr);
6695unsigned Count = 0;
6696for (autoOp : expr_ops)
6697if (Op.getOp() ==dwarf::DW_OP_LLVM_arg)
6698 Count++;
6699return Count;
6700}
6701
6702/// Overwrites DVI with the location and Ops as the DIExpression. This will
6703/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
6704/// because a DIArglist is not created for the first argument of the dbg.value.
6705template <typename T>
6706staticvoidupdateDVIWithLocation(T &DbgVal,Value *Location,
6707SmallVectorImpl<uint64_t> &Ops) {
6708assert(numLLVMArgOps(Ops) == 0 &&"Expected expression that does not "
6709"contain any DW_OP_llvm_arg operands.");
6710 DbgVal.setRawLocation(ValueAsMetadata::get(Location));
6711 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6712 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6713}
6714
6715/// Overwrite DVI with locations placed into a DIArglist.
6716template <typename T>
6717staticvoidupdateDVIWithLocations(T &DbgVal,
6718SmallVectorImpl<Value *> &Locations,
6719SmallVectorImpl<uint64_t> &Ops) {
6720assert(numLLVMArgOps(Ops) != 0 &&
6721"Expected expression that references DIArglist locations using "
6722"DW_OP_llvm_arg operands.");
6723SmallVector<ValueAsMetadata *, 3> MetadataLocs;
6724for (Value *V : Locations)
6725 MetadataLocs.push_back(ValueAsMetadata::get(V));
6726auto ValArrayRef =llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6727 DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));
6728 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6729}
6730
6731/// Write the new expression and new location ops for the dbg.value. If possible
6732/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
6733/// can be omitted if:
6734/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
6735/// 2. The DW_OP_LLVM_arg is the first operand in the expression.
6736staticvoidUpdateDbgValueInst(DVIRecoveryRec &DVIRec,
6737SmallVectorImpl<Value *> &NewLocationOps,
6738SmallVectorImpl<uint64_t> &NewExpr) {
6739auto UpdateDbgValueInstImpl = [&](auto *DbgVal) {
6740unsigned NumLLVMArgs =numLLVMArgOps(NewExpr);
6741if (NumLLVMArgs == 0) {
6742// Location assumed to be on the stack.
6743updateDVIWithLocation(*DbgVal, NewLocationOps[0],NewExpr);
6744 }elseif (NumLLVMArgs == 1 &&NewExpr[0] ==dwarf::DW_OP_LLVM_arg) {
6745// There is only a single DW_OP_llvm_arg at the start of the expression,
6746// so it can be omitted along with DIArglist.
6747assert(NewExpr[1] == 0 &&
6748"Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6749llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
6750updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);
6751 }else {
6752// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
6753updateDVIWithLocations(*DbgVal, NewLocationOps,NewExpr);
6754 }
6755
6756// If the DIExpression was previously empty then add the stack terminator.
6757// Non-empty expressions have only had elements inserted into them and so
6758// the terminator should already be present e.g. stack_value or fragment.
6759DIExpression *SalvageExpr = DbgVal->getExpression();
6760if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
6761 SalvageExpr =
6762DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
6763 DbgVal->setExpression(SalvageExpr);
6764 }
6765 };
6766if (isa<DbgValueInst *>(DVIRec.DbgRef))
6767 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6768else
6769 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6770}
6771
6772/// Cached location ops may be erased during LSR, in which case a poison is
6773/// required when restoring from the cache. The type of that location is no
6774/// longer available, so just use int8. The poison will be replaced by one or
6775/// more locations later when a SCEVDbgValueBuilder selects alternative
6776/// locations to use for the salvage.
6777staticValue *getValueOrPoison(WeakVH &VH,LLVMContext &C) {
6778return (VH) ? VH :PoisonValue::get(llvm::Type::getInt8Ty(C));
6779}
6780
6781/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
6782staticvoidrestorePreTransformState(DVIRecoveryRec &DVIRec) {
6783auto RestorePreTransformStateImpl = [&](auto *DbgVal) {
6784LLVM_DEBUG(dbgs() <<"scev-salvage: restore dbg.value to pre-LSR state\n"
6785 <<"scev-salvage: post-LSR: " << *DbgVal <<'\n');
6786assert(DVIRec.Expr &&"Expected an expression");
6787 DbgVal->setExpression(DVIRec.Expr);
6788
6789// Even a single location-op may be inside a DIArgList and referenced with
6790// DW_OP_LLVM_arg, which is valid only with a DIArgList.
6791if (!DVIRec.HadLocationArgList) {
6792assert(DVIRec.LocationOps.size() == 1 &&
6793"Unexpected number of location ops.");
6794// LSR's unsuccessful salvage attempt may have added DIArgList, which in
6795// this case was not present before, so force the location back to a
6796// single uncontained Value.
6797Value *CachedValue =
6798getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());
6799 DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));
6800 }else {
6801SmallVector<ValueAsMetadata *, 3> MetadataLocs;
6802for (WeakVH VH : DVIRec.LocationOps) {
6803Value *CachedValue =getValueOrPoison(VH, DbgVal->getContext());
6804 MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
6805 }
6806auto ValArrayRef =llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6807 DbgVal->setRawLocation(
6808llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));
6809 }
6810LLVM_DEBUG(dbgs() <<"scev-salvage: pre-LSR: " << *DbgVal <<'\n');
6811 };
6812if (isa<DbgValueInst *>(DVIRec.DbgRef))
6813 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6814else
6815 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6816}
6817
6818staticboolSalvageDVI(llvm::Loop *L,ScalarEvolution &SE,
6819llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
6820constSCEV *SCEVInductionVar,
6821 SCEVDbgValueBuilder IterCountExpr) {
6822
6823if (isa<DbgValueInst *>(DVIRec.DbgRef)
6824 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6825 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6826returnfalse;
6827
6828// LSR may have caused several changes to the dbg.value in the failed salvage
6829// attempt. So restore the DIExpression, the location ops and also the
6830// location ops format, which is always DIArglist for multiple ops, but only
6831// sometimes for a single op.
6832restorePreTransformState(DVIRec);
6833
6834// LocationOpIndexMap[i] will store the post-LSR location index of
6835// the non-optimised out location at pre-LSR index i.
6836SmallVector<int64_t, 2> LocationOpIndexMap;
6837 LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
6838SmallVector<Value *, 2> NewLocationOps;
6839 NewLocationOps.push_back(LSRInductionVar);
6840
6841for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6842WeakVH VH = DVIRec.LocationOps[i];
6843// Place the locations not optimised out in the list first, avoiding
6844// inserts later. The map is used to update the DIExpression's
6845// DW_OP_LLVM_arg arguments as the expression is updated.
6846if (VH && !isa<UndefValue>(VH)) {
6847 NewLocationOps.push_back(VH);
6848 LocationOpIndexMap[i] = NewLocationOps.size() - 1;
6849LLVM_DEBUG(dbgs() <<"scev-salvage: Location index " << i
6850 <<" now at index " << LocationOpIndexMap[i] <<"\n");
6851continue;
6852 }
6853
6854// It's possible that a value referred to in the SCEV may have been
6855// optimised out by LSR.
6856if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||
6857 SE.containsUndefs(DVIRec.SCEVs[i])) {
6858LLVM_DEBUG(dbgs() <<"scev-salvage: SCEV for location at index: " << i
6859 <<" refers to a location that is now undef or erased. "
6860"Salvage abandoned.\n");
6861returnfalse;
6862 }
6863
6864LLVM_DEBUG(dbgs() <<"scev-salvage: salvaging location at index " << i
6865 <<" with SCEV: " << *DVIRec.SCEVs[i] <<"\n");
6866
6867 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6868 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6869
6870// Create an offset-based salvage expression if possible, as it requires
6871// less DWARF ops than an iteration count-based expression.
6872if (std::optional<APInt>Offset =
6873 SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
6874if (Offset->getSignificantBits() <= 64)
6875 SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);
6876else
6877returnfalse;
6878 }elseif (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6879 SE))
6880returnfalse;
6881 }
6882
6883// Merge the DbgValueBuilder generated expressions and the original
6884// DIExpression, place the result into an new vector.
6885SmallVector<uint64_t, 3>NewExpr;
6886if (DVIRec.Expr->getNumElements() == 0) {
6887assert(DVIRec.RecoveryExprs.size() == 1 &&
6888"Expected only a single recovery expression for an empty "
6889"DIExpression.");
6890assert(DVIRec.RecoveryExprs[0] &&
6891"Expected a SCEVDbgSalvageBuilder for location 0");
6892 SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
6893B->appendToVectors(NewExpr, NewLocationOps);
6894 }
6895for (constauto &Op : DVIRec.Expr->expr_ops()) {
6896// Most Ops needn't be updated.
6897if (Op.getOp() !=dwarf::DW_OP_LLVM_arg) {
6898Op.appendToVector(NewExpr);
6899continue;
6900 }
6901
6902uint64_t LocationArgIndex =Op.getArg(0);
6903 SCEVDbgValueBuilder *DbgBuilder =
6904 DVIRec.RecoveryExprs[LocationArgIndex].get();
6905// The location doesn't have s SCEVDbgValueBuilder, so LSR did not
6906// optimise it away. So just translate the argument to the updated
6907// location index.
6908if (!DbgBuilder) {
6909NewExpr.push_back(dwarf::DW_OP_LLVM_arg);
6910assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
6911"Expected a positive index for the location-op position.");
6912NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
6913continue;
6914 }
6915// The location has a recovery expression.
6916 DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
6917 }
6918
6919UpdateDbgValueInst(DVIRec, NewLocationOps,NewExpr);
6920if (isa<DbgValueInst *>(DVIRec.DbgRef))
6921LLVM_DEBUG(dbgs() <<"scev-salvage: Updated DVI: "
6922 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<"\n");
6923else
6924LLVM_DEBUG(dbgs() <<"scev-salvage: Updated DVI: "
6925 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<"\n");
6926returntrue;
6927}
6928
6929/// Obtain an expression for the iteration count, then attempt to salvage the
6930/// dbg.value intrinsics.
6931staticvoidDbgRewriteSalvageableDVIs(
6932llvm::Loop *L,ScalarEvolution &SE,llvm::PHINode *LSRInductionVar,
6933SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6934if (DVIToUpdate.empty())
6935return;
6936
6937constllvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
6938assert(SCEVInductionVar &&
6939"Anticipated a SCEV for the post-LSR induction variable");
6940
6941if (constSCEVAddRecExpr *IVAddRec =
6942 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6943if (!IVAddRec->isAffine())
6944return;
6945
6946// Prevent translation using excessive resources.
6947if (IVAddRec->getExpressionSize() >MaxSCEVSalvageExpressionSize)
6948return;
6949
6950// The iteration count is required to recover location values.
6951 SCEVDbgValueBuilder IterCountExpr;
6952 IterCountExpr.pushLocation(LSRInductionVar);
6953if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6954return;
6955
6956LLVM_DEBUG(dbgs() <<"scev-salvage: IV SCEV: " << *SCEVInductionVar
6957 <<'\n');
6958
6959for (auto &DVIRec : DVIToUpdate) {
6960SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6961 IterCountExpr);
6962 }
6963 }
6964}
6965
6966/// Identify and cache salvageable DVI locations and expressions along with the
6967/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
6968/// cacheing and salvaging.
6969staticvoidDbgGatherSalvagableDVI(
6970Loop *L,ScalarEvolution &SE,
6971SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6972SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
6973for (constauto &B : L->getBlocks()) {
6974for (auto &I : *B) {
6975auto ProcessDbgValue = [&](auto *DbgVal) ->bool {
6976// Ensure that if any location op is undef that the dbg.vlue is not
6977// cached.
6978if (DbgVal->isKillLocation())
6979returnfalse;
6980
6981// Check that the location op SCEVs are suitable for translation to
6982// DIExpression.
6983constauto &HasTranslatableLocationOps =
6984 [&](constauto *DbgValToTranslate) ->bool {
6985for (constauto LocOp : DbgValToTranslate->location_ops()) {
6986if (!LocOp)
6987returnfalse;
6988
6989if (!SE.isSCEVable(LocOp->getType()))
6990returnfalse;
6991
6992constSCEV *S = SE.getSCEV(LocOp);
6993if (SE.containsUndefs(S))
6994returnfalse;
6995 }
6996returntrue;
6997 };
6998
6999if (!HasTranslatableLocationOps(DbgVal))
7000returnfalse;
7001
7002 std::unique_ptr<DVIRecoveryRec> NewRec =
7003 std::make_unique<DVIRecoveryRec>(DbgVal);
7004// Each location Op may need a SCEVDbgValueBuilder in order to recover
7005// it. Pre-allocating a vector will enable quick lookups of the builder
7006// later during the salvage.
7007 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
7008for (constauto LocOp : DbgVal->location_ops()) {
7009 NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
7010 NewRec->LocationOps.push_back(LocOp);
7011 NewRec->HadLocationArgList = DbgVal->hasArgList();
7012 }
7013 SalvageableDVISCEVs.push_back(std::move(NewRec));
7014returntrue;
7015 };
7016for (DbgVariableRecord &DVR :filterDbgVars(I.getDbgRecordRange())) {
7017if (DVR.isDbgValue() || DVR.isDbgAssign())
7018 ProcessDbgValue(&DVR);
7019 }
7020auto DVI = dyn_cast<DbgValueInst>(&I);
7021if (!DVI)
7022continue;
7023if (ProcessDbgValue(DVI))
7024 DVIHandles.insert(DVI);
7025 }
7026 }
7027}
7028
7029/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
7030/// any PHi from the loop header is usable, but may have less chance of
7031/// surviving subsequent transforms.
7032staticllvm::PHINode *GetInductionVariable(constLoop &L,ScalarEvolution &SE,
7033const LSRInstance &LSR) {
7034
7035auto IsSuitableIV = [&](PHINode *P) {
7036if (!SE.isSCEVable(P->getType()))
7037returnfalse;
7038if (constSCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
7039return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
7040returnfalse;
7041 };
7042
7043// For now, just pick the first IV that was generated and inserted by
7044// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
7045// by subsequent transforms.
7046for (constWeakVH &IV : LSR.getScalarEvolutionIVs()) {
7047if (!IV)
7048continue;
7049
7050// There should only be PHI node IVs.
7051PHINode *P = cast<PHINode>(&*IV);
7052
7053if (IsSuitableIV(P))
7054returnP;
7055 }
7056
7057for (PHINode &P : L.getHeader()->phis()) {
7058if (IsSuitableIV(&P))
7059return &P;
7060 }
7061returnnullptr;
7062}
7063
7064staticboolReduceLoopStrength(Loop *L,IVUsers &IU,ScalarEvolution &SE,
7065DominatorTree &DT,LoopInfo &LI,
7066constTargetTransformInfo &TTI,
7067AssumptionCache &AC,TargetLibraryInfo &TLI,
7068MemorySSA *MSSA) {
7069
7070// Debug preservation - before we start removing anything identify which DVI
7071// meet the salvageable criteria and store their DIExpression and SCEVs.
7072SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
7073SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
7074DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
7075
7076bool Changed =false;
7077 std::unique_ptr<MemorySSAUpdater> MSSAU;
7078if (MSSA)
7079 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7080
7081// Run the main LSR transformation.
7082const LSRInstance &Reducer =
7083 LSRInstance(L, IU, SE, DT, LI,TTI, AC, TLI, MSSAU.get());
7084 Changed |= Reducer.getChanged();
7085
7086// Remove any extra phis created by processing inner loops.
7087 Changed |=DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
7088if (EnablePhiElim && L->isLoopSimplifyForm()) {
7089SmallVector<WeakTrackingVH, 16> DeadInsts;
7090constDataLayout &DL = L->getHeader()->getDataLayout();
7091SCEVExpanderRewriter(SE,DL,"lsr",false);
7092#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7093Rewriter.setDebugType(DEBUG_TYPE);
7094#endif
7095unsigned numFolded =Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
7096Rewriter.clear();
7097if (numFolded) {
7098 Changed =true;
7099RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
7100 MSSAU.get());
7101DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
7102 }
7103 }
7104// LSR may at times remove all uses of an induction variable from a loop.
7105// The only remaining use is the PHI in the exit block.
7106// When this is the case, if the exit value of the IV can be calculated using
7107// SCEV, we can replace the exit block PHI with the final value of the IV and
7108// skip the updates in each loop iteration.
7109if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7110SmallVector<WeakTrackingVH, 16> DeadInsts;
7111constDataLayout &DL = L->getHeader()->getDataLayout();
7112SCEVExpanderRewriter(SE,DL,"lsr",true);
7113int Rewrites =rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI,Rewriter, &DT,
7114UnusedIndVarInLoop, DeadInsts);
7115Rewriter.clear();
7116if (Rewrites) {
7117 Changed =true;
7118RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
7119 MSSAU.get());
7120DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
7121 }
7122 }
7123
7124if (SalvageableDVIRecords.empty())
7125return Changed;
7126
7127// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
7128// expressions composed using the derived iteration count.
7129// TODO: Allow for multiple IV references for nested AddRecSCEVs
7130for (constauto &L : LI) {
7131if (llvm::PHINode *IV =GetInductionVariable(*L, SE, Reducer))
7132DbgRewriteSalvageableDVIs(L, SE,IV, SalvageableDVIRecords);
7133else {
7134LLVM_DEBUG(dbgs() <<"scev-salvage: SCEV salvaging not possible. An IV "
7135"could not be identified.\n");
7136 }
7137 }
7138
7139for (auto &Rec : SalvageableDVIRecords)
7140 Rec->clear();
7141 SalvageableDVIRecords.clear();
7142 DVIHandles.clear();
7143return Changed;
7144}
7145
7146bool LoopStrengthReduce::runOnLoop(Loop *L,LPPassManager &/*LPM*/) {
7147if (skipLoop(L))
7148returnfalse;
7149
7150auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7151auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7152auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7153auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7154constauto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7155 *L->getHeader()->getParent());
7156auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7157 *L->getHeader()->getParent());
7158auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7159 *L->getHeader()->getParent());
7160auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7161MemorySSA *MSSA =nullptr;
7162if (MSSAAnalysis)
7163 MSSA = &MSSAAnalysis->getMSSA();
7164returnReduceLoopStrength(L, IU, SE, DT, LI,TTI, AC, TLI, MSSA);
7165}
7166
7167PreservedAnalysesLoopStrengthReducePass::run(Loop &L,LoopAnalysisManager &AM,
7168LoopStandardAnalysisResults &AR,
7169LPMUpdater &) {
7170if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
7171 AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))
7172returnPreservedAnalyses::all();
7173
7174auto PA =getLoopPassPreservedAnalyses();
7175if (AR.MSSA)
7176 PA.preserve<MemorySSAAnalysis>();
7177return PA;
7178}
7179
7180char LoopStrengthReduce::ID = 0;
7181
7182INITIALIZE_PASS_BEGIN(LoopStrengthReduce,"loop-reduce",
7183"Loop Strength Reduction",false,false)
7184INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
7185INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
7186INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
7187INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
7188INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
7189INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
7190INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
7191 "Loop Strength Reduction",false,false)
7192
7193Pass *llvm::createLoopStrengthReducePass() {returnnew LoopStrengthReduce(); }
Success
#define Success
Definition:AArch64Disassembler.cpp:220
for
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Definition:AArch64ExpandPseudoInsts.cpp:115
APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition:ArchiveWriter.cpp:205
AssumptionCache.h
isEqual
static bool isEqual(const Function &Caller, const Function &Callee)
Definition:Attributes.cpp:2469
getParent
static const Function * getParent(const Value *V)
Definition:BasicAliasAnalysis.cpp:863
BasicBlockUtils.h
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Casting.h
CommandLine.h
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition:CommandLine.h:686
Compiler.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition:Compiler.h:622
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
isCanonical
static bool isCanonical(const MDString *S)
Definition:DebugInfoMetadata.cpp:386
DebugInfoMetadata.h
Debug.h
LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition:Debug.h:106
DenseMap.h
This file defines the DenseMap class.
DenseSet.h
This file defines the DenseSet and SmallDenseSet classes.
DerivedTypes.h
DomTreeUpdater.h
Dominators.h
Dwarf.h
This file contains constants used for implementing Dwarf debug support.
Other
std::optional< std::vector< StOtherPiece > > Other
Definition:ELFYAML.cpp:1315
End
bool End
Definition:ELF_riscv.cpp:480
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
GlobalValue.h
Hashing.h
Loops
Hexagon Hardware Loops
Definition:HexagonHardwareLoops.cpp:373
IRBuilder.h
BasicBlock.h
Constant.h
Instruction.h
IntrinsicInst.h
Module.h
Module.h This file contains the declarations for the Module class.
Operator.h
Type.h
Use.h
This defines the Use class.
User.h
Value.h
Users
iv Induction Variable Users
Definition:IVUsers.cpp:48
IVUsers.h
InitializePasses.h
InstrTypes.h
Instructions.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition:Lint.cpp:557
LoopAnalysisManager.h
This header provides classes for managing per-loop analyses.
LoopInfo.h
LoopPass.h
SalvageDVI
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
Definition:LoopStrengthReduce.cpp:6818
DropScaledForVScale
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
getWideOperand
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
Definition:LoopStrengthReduce.cpp:2983
isAddSExtable
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
Definition:LoopStrengthReduce.cpp:799
mayUsePostIncMode
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
Definition:LoopStrengthReduce.cpp:3893
restorePreTransformState
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
Definition:LoopStrengthReduce.cpp:6782
ExtractImmediate
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
Definition:LoopStrengthReduce.cpp:924
containsAddRecDependentOnLoop
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
Definition:LoopStrengthReduce.cpp:614
findIVOperand
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
Definition:LoopStrengthReduce.cpp:2964
isLegalUse
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
Definition:LoopStrengthReduce.cpp:1917
isMulSExtable
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
Definition:LoopStrengthReduce.cpp:807
MaxSCEVSalvageExpressionSize
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
Definition:LoopStrengthReduce.cpp:144
isExistingPhi
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
Definition:LoopStrengthReduce.cpp:1082
getScalingFactorCost
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
Definition:LoopStrengthReduce.cpp:1963
InsnsCost
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
StressIVChain
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
isAddressUse
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
Definition:LoopStrengthReduce.cpp:984
ExtractSymbol
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
Definition:LoopStrengthReduce.cpp:958
updateDVIWithLocation
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
Definition:LoopStrengthReduce.cpp:6706
isLegalAddImmediate
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
Definition:LoopStrengthReduce.cpp:1938
AllowDropSolutionIfLessProfitable
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
EnableVScaleImmediates
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
PreferredAddresingMode
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
getExprBase
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
Definition:LoopStrengthReduce.cpp:2999
isAlwaysFoldable
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
Definition:LoopStrengthReduce.cpp:2008
GetInductionVariable
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
Definition:LoopStrengthReduce.cpp:7032
IsSimplerBaseSCEVForTarget
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
Definition:LoopStrengthReduce.cpp:5307
reduce
loop reduce
Definition:LoopStrengthReduce.cpp:7190
MaxIVUsers
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
Definition:LoopStrengthReduce.cpp:138
isHighCostExpansion
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
Definition:LoopStrengthReduce.cpp:1102
getValueOrPoison
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
Definition:LoopStrengthReduce.cpp:6777
getAccessType
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
Definition:LoopStrengthReduce.cpp:1029
numLLVMArgOps
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
Definition:LoopStrengthReduce.cpp:6693
DbgRewriteSalvageableDVIs
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
Definition:LoopStrengthReduce.cpp:6931
EnablePhiElim
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
DbgGatherSalvagableDVI
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
Definition:LoopStrengthReduce.cpp:6969
isAddRecSExtable
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
Definition:LoopStrengthReduce.cpp:791
canHoistIVInc
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
Definition:LoopStrengthReduce.cpp:6031
DoInitialMatch
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
Definition:LoopStrengthReduce.cpp:541
isAMCompletelyFolded
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
Definition:LoopStrengthReduce.cpp:1946
LSRExpNarrow
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
FilterSameScaledReg
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
updateDVIWithLocations
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
Definition:LoopStrengthReduce.cpp:6717
ComplexityLimit
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
UpdateDbgValueInst
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
Definition:LoopStrengthReduce.cpp:6736
ReduceLoopStrength
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
Definition:LoopStrengthReduce.cpp:7064
isProfitableChain
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
Definition:LoopStrengthReduce.cpp:3066
CollectSubexprs
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
Definition:LoopStrengthReduce.cpp:3835
getExactSDiv
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
Definition:LoopStrengthReduce.cpp:819
canFoldIVIncExpr
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
Definition:LoopStrengthReduce.cpp:3359
DEBUG_TYPE
#define DEBUG_TYPE
Definition:LoopStrengthReduce.cpp:132
getAnyExtendConsideringPostIncUses
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
Definition:LoopStrengthReduce.cpp:4400
getSetupCost
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
Definition:LoopStrengthReduce.cpp:1392
SetupCostDepthLimit
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
LoopStrengthReduce.h
LoopUtils.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
G
#define G(x, y, z)
Definition:MD5.cpp:56
Reg
unsigned Reg
Definition:MachineSink.cpp:2028
MathExtras.h
MemorySSAUpdater.h
MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
II
uint64_t IntrinsicInst * II
Definition:NVVMIntrRange.cpp:51
P
#define P(N)
Fixup
PowerPC TLS Dynamic Call Fixup
Definition:PPCTLSDynamicCall.cpp:339
INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition:PassSupport.h:55
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:57
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:52
Pass.h
PointerIntPair.h
This file defines the PointerIntPair class.
Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition:RISCVRedundantCopyElimination.cpp:75
Uses
Remove Loads Into Fake Uses
Definition:RemoveLoadsIntoFakeUses.cpp:75
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition:RustDemangle.cpp:181
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
RA
SI optimize exec mask operations pre RA
Definition:SIOptimizeExecMaskingPreRA.cpp:71
Address
@ Address
Definition:SPIRVEmitNonSemanticDI.cpp:68
STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.
OS
raw_pwrite_stream & OS
Definition:SampleProfWriter.cpp:51
ScalarEvolutionExpander.h
ScalarEvolutionExpressions.h
ScalarEvolutionNormalization.h
ScalarEvolution.h
Scalar.h
SetVector.h
This file implements a set that has insertion order iteration characteristics.
SmallBitVector.h
This file implements the SmallBitVector class.
SmallPtrSet.h
This file defines the SmallPtrSet class.
SmallSet.h
This file defines the SmallSet class.
SmallVector.h
This file defines the SmallVector class.
Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition:TapiFile.cpp:39
TargetLibraryInfo.h
TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.
Local.h
Utils.h
ValueHandle.h
ValueTracking.h
Rewriter
Virtual Register Rewriter
Definition:VirtRegMap.cpp:261
RHS
Value * RHS
Definition:X86PartialReduction.cpp:74
LHS
Value * LHS
Definition:X86PartialReduction.cpp:73
Mul
BinaryOperator * Mul
Definition:X86PartialReduction.cpp:68
IV
static const uint32_t IV[8]
Definition:blake3_impl.h:78
LiveDebugValues::DbgValue
Class recording the (high level) value of a variable.
Definition:InstrRefBasedImpl.h:512
NewExpr
Definition:ItaniumDemangle.h:2103
const_iterator
T
llvm::APInt
Class for arbitrary precision integers.
Definition:APInt.h:78
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition:APInt.h:1520
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition:APInt.h:329
llvm::APInt::sdiv
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition:APInt.cpp:1618
llvm::APInt::getSignificantBits
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition:APInt.h:1511
llvm::APInt::srem
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition:APInt.cpp:1710
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition:APInt.h:1542
llvm::AddOperator
Definition:Operator.h:405
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition:PassManager.h:253
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition:PassManager.h:410
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition:PassAnalysisSupport.h:47
llvm::AnalysisUsage::addRequiredID
AnalysisUsage & addRequiredID(const void *ID)
Definition:Pass.cpp:270
llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition:PassAnalysisSupport.h:88
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition:PassAnalysisSupport.h:75
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition:PassAnalysisSupport.h:98
llvm::Any
Definition:Any.h:28
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::AssertingVH
Value handle that asserts if the Value is deleted.
Definition:ValueHandle.h:264
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition:AssumptionCache.h:204
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition:AssumptionCache.h:42
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition:Instructions.h:501
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition:Instructions.h:704
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition:BasicBlock.h:530
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition:BasicBlock.h:177
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition:BasicBlock.h:389
llvm::BasicBlock::isLandingPad
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition:BasicBlock.cpp:699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition:BasicBlock.h:240
llvm::BinaryOperator
Definition:InstrTypes.h:170
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition:InstrTypes.h:370
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition:Instructions.cpp:2639
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition:Instructions.h:3016
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition:Instructions.h:3089
llvm::BranchInst::getCondition
Value * getCondition() const
Definition:Instructions.h:3092
llvm::CastInst::getCastOpcode
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Definition:Instructions.cpp:3144
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition:Instructions.cpp:2972
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition:InstrTypes.h:673
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition:InstrTypes.h:694
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition:InstrTypes.h:695
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition:InstrTypes.h:787
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition:Constants.h:83
llvm::ConstantInt::isValueValidForType
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
Definition:Constants.cpp:1597
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition:Constants.h:126
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition:Constants.h:163
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition:Constants.h:157
llvm::Constant
This is an important base class in LLVM.
Definition:Constant.h:42
llvm::DIArgList::get
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
Definition:DebugInfoMetadata.cpp:2315
llvm::DIExpression::expr_op_iterator
An iterator for expression operands.
Definition:DebugInfoMetadata.h:2851
llvm::DIExpression
DWARF expression.
Definition:DebugInfoMetadata.h:2763
llvm::DIExpression::append
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
Definition:DebugInfoMetadata.cpp:1948
llvm::DIExpression::appendOffset
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
Definition:DebugInfoMetadata.cpp:1721
llvm::DIExpression::isComplex
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
Definition:DebugInfoMetadata.cpp:1540
llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition:DWARFExpression.h:32
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::DbgValueInst
This represents the llvm.dbg.value instruction.
Definition:IntrinsicInst.h:468
llvm::DbgVariableRecord
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Definition:DebugProgramInstruction.h:270
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition:DenseMap.h:211
llvm::DenseMapBase::clear
void clear()
Definition:DenseMap.h:110
llvm::DenseMap
Definition:DenseMap.h:727
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition:DenseSet.h:278
llvm::DomTreeNodeBase< BasicBlock >
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition:GenericDomTree.h:89
llvm::DomTreeUpdater
Definition:DomTreeUpdater.h:30
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition:GenericDomTree.h:401
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition:GenericDomTree.h:443
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition:Dominators.h:317
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition:Dominators.h:162
llvm::DominatorTree::findNearestCommonDominator
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition:Dominators.cpp:344
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition:Dominators.cpp:122
llvm::GlobalValue
Definition:GlobalValue.h:48
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition:Instructions.h:1158
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition:IRBuilder.h:2705
llvm::IVStrideUse
IVStrideUse - Keep track of one use of a strided induction variable.
Definition:IVUsers.h:35
llvm::IVStrideUse::transformToPostInc
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Definition:IVUsers.cpp:367
llvm::IVStrideUse::getOperandValToReplace
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
Definition:IVUsers.h:54
llvm::IVStrideUse::setUser
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Definition:IVUsers.h:48
llvm::IVUsersAnalysis
Analysis pass that exposes the IVUsers for a loop.
Definition:IVUsers.h:184
llvm::IVUsersWrapperPass
Definition:IVUsers.h:163
llvm::IVUsers
Definition:IVUsers.h:91
llvm::IVUsers::const_iterator
ilist< IVStrideUse >::const_iterator const_iterator
Definition:IVUsers.h:142
llvm::IVUsers::empty
bool empty() const
Definition:IVUsers.h:147
llvm::Init
Definition:Record.h:285
llvm::InstructionCost
Definition:InstructionCost.h:29
llvm::InstructionCost::print
void print(raw_ostream &OS) const
Definition:InstructionCost.cpp:19
llvm::InstructionCost::getValue
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition:InstructionCost.h:87
llvm::InstructionCost::isValid
bool isValid() const
Definition:InstructionCost.h:79
llvm::Instruction
Definition:Instruction.h:68
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition:Instruction.cpp:1275
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition:Instruction.h:511
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition:Instruction.h:869
llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition:Instruction.cpp:94
llvm::Instruction::getAccessType
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
Definition:Instruction.cpp:1100
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition:Instruction.h:310
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition:Instruction.h:508
llvm::Instruction::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition:Instruction.cpp:76
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition:Instruction.cpp:175
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition:Type.cpp:311
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition:IntrinsicInst.h:48
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition:LLVMContext.h:67
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition:LoopPassManager.h:229
llvm::LPPassManager
Definition:LoopPass.h:76
llvm::LoadInst
An instruction for reading from memory.
Definition:Instructions.h:176
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition:GenericLoopInfoImpl.h:33
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition:GenericLoopInfo.h:90
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition:GenericLoopInfo.h:82
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition:LoopInfo.h:593
llvm::LoopInfo
Definition:LoopInfo.h:407
llvm::LoopPass
Definition:LoopPass.h:28
llvm::LoopPass::runOnLoop
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
llvm::LoopStrengthReducePass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition:LoopStrengthReduce.cpp:7167
llvm::Loop
Represents a single loop in the control flow graph.
Definition:LoopInfo.h:39
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition:MemorySSA.h:928
llvm::MemorySSAUpdater
Definition:MemorySSAUpdater.h:54
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition:MemorySSA.h:985
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition:MemorySSA.h:701
llvm::PHINode
Definition:Instructions.h:2600
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition:Instructions.h:2735
llvm::PHINode::blocks
iterator_range< const_block_iterator > blocks() const
Definition:Instructions.h:2661
llvm::PHINode::incoming_values
op_range incoming_values()
Definition:Instructions.h:2665
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition:Instructions.h:2678
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition:Instructions.h:2695
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition:Instructions.h:2675
llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition:Instructions.h:2689
llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition:Instructions.h:2768
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition:Instructions.h:2671
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition:Instructions.h:2635
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition:PassRegistry.cpp:24
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition:Pass.h:94
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition:Pass.cpp:98
llvm::PointerIntPair
PointerIntPair - This class implements a pair of a pointer and small integer.
Definition:PointerIntPair.h:80
llvm::PointerUnion
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition:PointerUnion.h:118
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition:Constants.cpp:1878
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition:Analysis.h:111
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition:Analysis.h:117
llvm::SCEVAddExpr
This node represents an addition of some number of SCEVs.
Definition:ScalarEvolutionExpressions.h:266
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition:ScalarEvolutionExpressions.h:347
llvm::SCEVAddRecExpr::getType
Type * getType() const
Definition:ScalarEvolutionExpressions.h:357
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition:ScalarEvolutionExpressions.h:358
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition:ScalarEvolutionExpressions.h:365
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition:ScalarEvolutionExpressions.h:375
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition:ScalarEvolutionExpressions.h:359
llvm::SCEVCastExpr
This is the base class for unary cast operator classes.
Definition:ScalarEvolutionExpressions.h:103
llvm::SCEVCommutativeExpr
This node is the base class for n'ary commutative operators.
Definition:ScalarEvolutionExpressions.h:247
llvm::SCEVConstant
This class represents a constant integer value.
Definition:ScalarEvolutionExpressions.h:60
llvm::SCEVConstant::getType
Type * getType() const
Definition:ScalarEvolutionExpressions.h:72
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition:ScalarEvolutionExpressions.h:69
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition:ScalarEvolutionExpressions.h:70
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition:ScalarEvolutionExpander.h:63
llvm::SCEVIntegralCastExpr
This is the base class for unary integral cast operator classes.
Definition:ScalarEvolutionExpressions.h:141
llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition:ScalarEvolutionExpressions.h:290
llvm::SCEVNAryExpr
This node is a base class providing common functionality for n'ary operators.
Definition:ScalarEvolutionExpressions.h:196
llvm::SCEVNAryExpr::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition:ScalarEvolutionExpressions.h:226
llvm::SCEVNAryExpr::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition:ScalarEvolutionExpressions.h:230
llvm::SCEVNAryExpr::operands
ArrayRef< const SCEV * > operands() const
Definition:ScalarEvolutionExpressions.h:218
llvm::SCEVSMaxExpr
This class represents a signed maximum selection.
Definition:ScalarEvolutionExpressions.h:464
llvm::SCEVUDivExpr
This class represents a binary unsigned division operation.
Definition:ScalarEvolutionExpressions.h:304
llvm::SCEVUMaxExpr
This class represents an unsigned maximum selection.
Definition:ScalarEvolutionExpressions.h:476
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition:ScalarEvolutionExpressions.h:577
llvm::SCEV
This class represents an analyzed expression in the program.
Definition:ScalarEvolution.h:71
llvm::SCEV::operands
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
Definition:ScalarEvolution.cpp:420
llvm::SCEV::getExpressionSize
unsigned short getExpressionSize() const
Definition:ScalarEvolution.h:169
llvm::SCEV::isZero
bool isZero() const
Return true if the expression is a constant zero.
Definition:ScalarEvolution.cpp:448
llvm::SCEV::getSCEVType
SCEVTypes getSCEVType() const
Definition:ScalarEvolution.h:140
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition:ScalarEvolution.cpp:386
llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition:ScalarEvolution.h:127
llvm::SIToFPInst
This class represents a cast from signed integer to floating point.
Definition:Instructions.h:4723
llvm::ScalarEvolutionWrapperPass
Definition:ScalarEvolution.h:2352
llvm::ScalarEvolution
The main scalar evolution driver.
Definition:ScalarEvolution.h:447
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition:ScalarEvolution.cpp:8350
llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition:ScalarEvolution.h:653
llvm::ScalarEvolution::getTypeSizeInBits
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
Definition:ScalarEvolution.cpp:4448
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition:ScalarEvolution.cpp:473
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition:ScalarEvolution.cpp:4547
llvm::ScalarEvolution::getNoopOrSignExtend
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition:ScalarEvolution.cpp:4742
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition:ScalarEvolution.cpp:14100
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition:ScalarEvolution.cpp:3641
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition:ScalarEvolution.cpp:4441
llvm::ScalarEvolution::getEffectiveSCEVType
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Definition:ScalarEvolution.cpp:4458
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition:ScalarEvolution.cpp:4655
llvm::ScalarEvolution::getAnyExtendExpr
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
Definition:ScalarEvolution.cpp:2180
llvm::ScalarEvolution::containsUndefs
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
Definition:ScalarEvolution.cpp:13577
llvm::ScalarEvolution::getSignExtendExpr
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition:ScalarEvolution.cpp:1900
llvm::ScalarEvolution::getVScale
const SCEV * getVScale(Type *Ty)
Definition:ScalarEvolution.cpp:494
llvm::ScalarEvolution::hasComputableLoopEvolution
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
Definition:ScalarEvolution.cpp:14104
llvm::ScalarEvolution::getPointerBase
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
Definition:ScalarEvolution.cpp:4823
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition:ScalarEvolution.cpp:3106
llvm::ScalarEvolution::getUnknown
const SCEV * getUnknown(Value *V)
Definition:ScalarEvolution.cpp:4411
llvm::ScalarEvolution::computeConstantDifference
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
Definition:ScalarEvolution.cpp:12059
llvm::ScalarEvolution::properlyDominates
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
Definition:ScalarEvolution.cpp:14187
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition:ScalarEvolution.cpp:2526
llvm::ScalarEvolution::containsErasedValue
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
Definition:ScalarEvolution.cpp:13586
llvm::ScalarEvolution::getContext
LLVMContext & getContext() const
Definition:ScalarEvolution.h:489
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition:Instructions.h:1657
llvm::SetVector
A vector that has set insertion semantics.
Definition:SetVector.h:57
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition:SetVector.h:98
llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition:SetVector.h:113
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition:SetVector.h:103
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition:SetVector.h:162
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition:SmallBitVector.h:35
llvm::SmallBitVector::find_first
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition:SmallBitVector.h:230
llvm::SmallBitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition:SmallBitVector.h:183
llvm::SmallBitVector::find_next
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition:SmallBitVector.h:277
llvm::SmallBitVector::size
size_type size() const
Returns the number of bits in this bitvector.
Definition:SmallBitVector.h:195
llvm::SmallBitVector::resize
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
Definition:SmallBitVector.h:332
llvm::SmallBitVector::count
size_type count() const
Returns the number of bits which are set.
Definition:SmallBitVector.h:200
llvm::SmallPtrSetImplBase::clear
void clear()
Definition:SmallPtrSet.h:97
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition:SmallPtrSet.h:363
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition:SmallPtrSet.h:452
llvm::SmallPtrSetImpl::end
iterator end() const
Definition:SmallPtrSet.h:477
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition:SmallPtrSet.h:384
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition:SmallPtrSet.h:472
llvm::SmallPtrSet< const Loop *, 2 >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition:SetVector.h:370
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition:SmallSet.h:132
llvm::SmallSet::clear
void clear()
Definition:SmallSet.h:204
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition:SmallSet.h:181
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition:SmallVector.h:673
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition:SmallVector.h:704
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition:SmallVector.h:937
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition:SmallVector.h:663
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition:SmallVector.h:737
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition:SmallVector.h:578
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition:SmallVector.h:805
llvm::SmallVectorImpl::clear
void clear()
Definition:SmallVector.h:610
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition:SmallVector.h:577
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition:SmallVector.h:638
llvm::SmallVectorTemplateBase::pop_back
void pop_back()
Definition:SmallVector.h:425
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVectorTemplateCommon::end
iterator end()
Definition:SmallVector.h:269
llvm::SmallVectorTemplateCommon::front
reference front()
Definition:SmallVector.h:299
llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition:SmallVector.h:267
llvm::SmallVectorTemplateCommon::back
reference back()
Definition:SmallVector.h:308
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StackOffset::get
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition:TypeSize.h:44
llvm::StoreInst
An instruction for storing to memory.
Definition:Instructions.h:292
llvm::TargetLibraryInfoWrapperPass
Definition:TargetLibraryInfo.h:639
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition:TargetLibraryInfo.h:280
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition:TargetTransformInfo.h:3250
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition:TargetTransformInfo.h:212
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition:TargetTransformInfo.cpp:1251
llvm::TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable
bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
Definition:TargetTransformInfo.cpp:441
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Definition:TargetTransformInfo.cpp:432
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition:TargetTransformInfo.cpp:445
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition:TargetTransformInfo.cpp:569
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition:TargetTransformInfo.cpp:1307
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition:TargetTransformInfo.cpp:767
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition:TargetTransformInfo.cpp:422
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition:TargetTransformInfo.cpp:1302
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition:TargetTransformInfo.cpp:418
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition:TargetTransformInfo.cpp:583
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition:TargetTransformInfo.cpp:410
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition:TargetTransformInfo.cpp:453
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition:TargetTransformInfo.cpp:759
llvm::TargetTransformInfo::isLegalAddScalableImmediate
bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
Definition:TargetTransformInfo.cpp:414
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition:TargetTransformInfo.cpp:437
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition:TargetTransformInfo.h:1700
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition:TargetTransformInfo.cpp:449
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition:TargetTransformInfo.h:780
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition:TargetTransformInfo.h:782
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition:TargetTransformInfo.h:781
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition:TargetTransformInfo.h:783
llvm::TargetTransformInfo::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition:TargetTransformInfo.cpp:560
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition:TargetTransformInfo.cpp:573
llvm::TruncInst
This class represents a truncation of integer types.
Definition:Instructions.h:4503
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition:Type.h:264
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
llvm::Type::getFPMantissaWidth
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::UIToFPInst
This class represents a cast unsigned integer to floating point.
Definition:Instructions.h:4692
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition:Use.h:43
llvm::User
Definition:User.h:44
llvm::User::operands
op_range operands()
Definition:User.h:288
llvm::User::replaceUsesOfWith
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition:User.cpp:21
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition:User.h:233
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition:User.h:228
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition:User.h:250
llvm::User::op_end
op_iterator op_end()
Definition:User.h:282
llvm::ValueAsMetadata::get
static ValueAsMetadata * get(Value *V)
Definition:Metadata.cpp:501
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition:Value.h:434
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition:Value.cpp:534
llvm::Value::users
iterator_range< user_iterator > users()
Definition:Value.h:421
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition:Value.cpp:1075
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition:Value.h:376
llvm::WeakVH
A nullable Value handle that is nullable.
Definition:ValueHandle.h:144
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition:CommandLine.h:399
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition:DenseSet.h:213
llvm::detail::DenseSetImpl::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition:DenseSet.h:95
llvm::details::FixedOrScalableQuantity
Definition:TypeSize.h:88
llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition:ilist_node.h:32
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition:ilist_node.h:132
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition:iterator_range.h:42
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition:raw_ostream.h:52
uint64_t
unsigned
iterator_range.h
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
ErrorHandling.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
false
Definition:StackSlotColoring.cpp:193
llvm::AArch64CC::NE
@ NE
Definition:AArch64BaseInfo.h:256
llvm::AArch64::Fixups
Fixups
Definition:AArch64FixupKinds.h:17
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition:AMDGPUMetadata.h:487
llvm::AMDGPU::Imm
@ Imm
Definition:AMDGPURegBankLegalizeRules.h:105
llvm::ARM_AM::add
@ add
Definition:ARMAddressingModes.h:39
llvm::ARM::ProfileKind::M
@ M
llvm::COFF::Entry
@ Entry
Definition:COFF.h:844
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm::M68k::MemAddrModeKind::U
@ U
llvm::M68k::MemAddrModeKind::V
@ V
llvm::M68k::MemAddrModeKind::u
@ u
llvm::M68k::MemAddrModeKind::f
@ f
llvm::M68k::MemAddrModeKind::K
@ K
llvm::M68k::MemAddrModeKind::L
@ L
llvm::MCID::RegSequence
@ RegSequence
Definition:MCInstrDesc.h:182
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition:PPCISelLowering.h:429
llvm::RISCVFenceField::R
@ R
Definition:RISCVBaseInfo.h:373
llvm::RISCVFenceField::O
@ O
Definition:RISCVBaseInfo.h:372
llvm::SPII::Store
@ Store
Definition:SparcInstrInfo.h:33
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition:X86DisassemblerDecoder.h:621
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition:CommandLine.h:711
llvm::cl::BOU_FALSE
@ BOU_FALSE
Definition:CommandLine.h:637
llvm::cl::BOU_UNSET
@ BOU_UNSET
Definition:CommandLine.h:637
llvm::cl::BOU_TRUE
@ BOU_TRUE
Definition:CommandLine.h:637
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::codeview::Basic
@ Basic
Definition:CodeView.h:152
llvm::dwarf::DW_OP_LLVM_arg
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
Definition:Dwarf.h:147
llvm::dwarf::DW_OP_LLVM_convert
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Definition:Dwarf.h:143
llvm::lltok::Kind
Kind
Definition:LLToken.h:18
llvm::logicalview::LVCompareKind::Types
@ Types
llvm::logicalview::LVAttributeKind::Inserted
@ Inserted
llvm::msgpack::Type::Map
@ Map
llvm::numbers::e
constexpr double e
Definition:MathExtras.h:47
llvm::objcarc::Sequence
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition:PtrState.h:41
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition:OptimizationRemarkEmitter.h:135
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition:RDFGraph.h:390
llvm::sampleprof::Base
@ Base
Definition:Discriminator.h:58
llvm::sys::path::begin
const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition:Path.cpp:226
llvm::sys::path::end
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition:Path.cpp:235
llvm::telemetry::KindType
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
Definition:Telemetry.h:77
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition:STLExtras.h:329
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition:SparseBitVector.h:877
llvm::Offset
@ Offset
Definition:DWP.cpp:480
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1759
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1739
llvm::PseudoProbeType::Block
@ Block
llvm::Depth
@ Depth
Definition:SIMachineScheduler.h:36
llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition:APInt.h:2082
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition:STLExtras.h:2115
llvm::LoopSimplifyID
char & LoopSimplifyID
Definition:LoopSimplify.cpp:784
llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition:AddressRanges.h:153
llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition:bit.h:215
llvm::matchSimpleRecurrence
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
Definition:ValueTracking.cpp:9214
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition:MathExtras.h:341
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition:BasicBlockUtils.cpp:164
llvm::reverse
auto reverse(ContainerTy &&C)
Definition:STLExtras.h:420
llvm::denormalizeForPostIncUse
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
Definition:ScalarEvolutionNormalization.cpp:120
llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition:PointerIntPair.h:270
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition:STLExtras.h:1664
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition:Debug.cpp:163
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1753
llvm::ConstantFoldCastOperand
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
Definition:ConstantFolding.cpp:1462
llvm::SplitLandingPadPredecessors
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Definition:BasicBlockUtils.cpp:1539
llvm::normalizeForPostIncUse
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
Definition:ScalarEvolutionNormalization.cpp:97
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition:raw_ostream.cpp:907
llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
llvm::RecurKind::Add
@ Add
Sum of integers.
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition:STLExtras.h:1938
llvm::createLoopStrengthReducePass
Pass * createLoopStrengthReducePass()
Definition:LoopStrengthReduce.cpp:7193
llvm::SplitCriticalEdge
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition:BreakCriticalEdges.cpp:101
llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition:Local.cpp:561
llvm::BitWidth
constexpr unsigned BitWidth
Definition:BitmaskEnum.h:217
llvm::PseudoProbeReservedId::Last
@ Last
llvm::formLCSSAForInstructions
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition:LCSSA.cpp:325
llvm::initializeLoopStrengthReducePass
void initializeLoopStrengthReducePass(PassRegistry &)
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition:LoopAnalysisManager.cpp:138
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1766
llvm::rewriteLoopExitValues
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition:LoopUtils.cpp:1549
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition:STLExtras.h:1903
llvm::scAddRecExpr
@ scAddRecExpr
Definition:ScalarEvolutionExpressions.h:48
llvm::scAddExpr
@ scAddExpr
Definition:ScalarEvolutionExpressions.h:45
llvm::scVScale
@ scVScale
Definition:ScalarEvolutionExpressions.h:41
llvm::scUnknown
@ scUnknown
Definition:ScalarEvolutionExpressions.h:55
llvm::scConstant
@ scConstant
Definition:ScalarEvolutionExpressions.h:40
llvm::scSignExtend
@ scSignExtend
Definition:ScalarEvolutionExpressions.h:44
llvm::scTruncate
@ scTruncate
Definition:ScalarEvolutionExpressions.h:42
llvm::scZeroExtend
@ scZeroExtend
Definition:ScalarEvolutionExpressions.h:43
llvm::scMulExpr
@ scMulExpr
Definition:ScalarEvolutionExpressions.h:46
llvm::UnusedIndVarInLoop
@ UnusedIndVarInLoop
Definition:LoopUtils.h:482
llvm::InlinerFunctionImportStatsOpts::Basic
@ Basic
llvm::filterDbgVars
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Definition:DebugProgramInstruction.h:555
llvm::hash_combine_range
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition:Hashing.h:468
llvm::SCEVExprContains
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Definition:ScalarEvolutionExpressions.h:720
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860
raw_ostream.h
N
#define N
WorkItem
Definition:WinEHPrepare.cpp:235
llvm::CriticalEdgeSplittingOptions
Option class for critical edge splitting.
Definition:BasicBlockUtils.h:145
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition:LoopAnalysisManager.h:53
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition:LoopAnalysisManager.h:58
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition:LoopAnalysisManager.h:63
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition:LoopAnalysisManager.h:60
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition:LoopAnalysisManager.h:55
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition:LoopAnalysisManager.h:59
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition:LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition:LoopAnalysisManager.h:56
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition:TargetTransformInfo.h:71
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition:TargetTransformInfo.h:76
llvm::TargetTransformInfo::LSRCost
Definition:TargetTransformInfo.h:522
llvm::cl::desc
Definition:CommandLine.h:409

Generated on Sun Jul 20 2025 14:08:28 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp