1//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This transformation analyzes and transforms the induction variables (and 10// computations derived from them) into forms suitable for efficient execution 13// This pass performs a strength reduction on array references inside loops that 14// have as one or more of their components the loop induction variable, it 15// rewrites expressions to take advantage of scaled-index addressing modes 16// available on the target, and it performs a variety of other optimizations 17// related to loop induction variables. 19// Terminology note: this code has a lot of handling for "post-increment" or 20// "post-inc" users. This is not talking about post-increment addressing modes; 21// it is instead talking about code like this: 23// %i = phi [ 0, %entry ], [ %i.next, %latch ] 26// %c = icmp eq %i.next, %n 28// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however 29// it's useful to think about these as the same register, with some uses using 30// the value of the register before the add and some using it after. In this 31// example, the icmp is a post-increment user, since it uses %i.next, which is 32// the value of the induction variable after the increment. The other common 33// case of post-increment users is users outside the loop. 35// TODO: More sophistication in the way Formulae are generated and filtered. 37// TODO: Handle multiple loops at a time. 39// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead 42// TODO: When truncation is free, truncate ICmp users' operands to make it a 43// smaller encoding (on x86 at least). 45// TODO: When a negated register is used by an add (such as in a list of 46// multiple base registers, or as the increment expression in an addrec), 47// we may not actually need both reg and (-1 * reg) in registers; the 48// negation can be implemented by using a sub instead of an add. The 49// lack of support for taking this into consideration when making 50// register pressure decisions is partly worked around by the "Special" 53//===----------------------------------------------------------------------===// 84#include "llvm/Config/llvm-config.h" 132#define DEBUG_TYPE "loop-reduce" 134/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for 135/// bail out. This threshold is far beyond the number of users that LSR can 136/// conceivably solve, so it should not affect generated code, but catches the 137/// worst cases before LSR burns too much compile time and stack space. 140/// Limit the size of expression that SCEV-based salvaging will attempt to 141/// translate into a DIExpression. 142/// Choose a maximum size such that debuginfo is not excessively increased and 143/// the salvaging is not too expensive for the compiler. 146// Cleanup congruent phis after LSR phi expansion. 149cl::desc(
"Enable LSR phi elimination"));
151// The flag adds instruction count to solutions cost comparison. 154cl::desc(
"Add instruction count to a LSR cost model"));
156// Flag to choose how to narrow complex lsr solution 159cl::desc(
"Narrow LSR complex solution using" 160" expectation of registers number"));
162// Flag to narrow search space by filtering non-optimal formulae with 163// the same ScaledReg and Scale. 166cl::desc(
"Narrow LSR search space by filtering non-optimal formulae" 167" with the same ScaledReg and Scale"));
171cl::desc(
"A flag that overrides the target's preferred addressing mode."),
174"Don't prefer any addressing mode"),
177"Prefer pre-indexed addressing mode"),
180"Prefer post-indexed addressing mode")));
184cl::init(std::numeric_limits<uint16_t>::max()),
185cl::desc(
"LSR search space complexity limit"));
189cl::desc(
"The limit on recursion depth for LSRs setup cost"));
193cl::desc(
"Attempt to drop solution if it is less profitable"));
197cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
201cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
204// Stress test IV chain generation. 207cl::desc(
"Stress test LSR IV chains"));
215 /// Used in situations where the accessed memory type is unknown. 216staticconstunsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
220unsigned AddrSpace = UnknownAddressSpace;
222 MemAccessTy() =
default;
223 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
226return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
232unsigned AS = UnknownAddressSpace) {
239/// This class holds data which is used to order reuse candidates. 242 /// This represents the set of LSRUse indices which reference 243 /// a particular register. 250// An offset from an address that is either scalable or fixed. Used for 251// per-target optimizations of addressing modes. 253constexpr Immediate(ScalarTy MinVal,
bool Scalable)
254 : FixedOrScalableQuantity(MinVal, Scalable) {}
256constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
257 : FixedOrScalableQuantity(
V) {}
260constexpr Immediate() =
delete;
262staticconstexpr Immediate getFixed(ScalarTy MinVal) {
263return {MinVal,
false};
265staticconstexpr Immediate getScalable(ScalarTy MinVal) {
268staticconstexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
269return {MinVal, Scalable};
271staticconstexpr Immediate getZero() {
return {0,
false}; }
272staticconstexpr Immediate getFixedMin() {
273return {std::numeric_limits<int64_t>::min(),
false};
275staticconstexpr Immediate getFixedMax() {
276return {std::numeric_limits<int64_t>::max(),
false};
278staticconstexpr Immediate getScalableMin() {
279return {std::numeric_limits<int64_t>::min(),
true};
281staticconstexpr Immediate getScalableMax() {
282return {std::numeric_limits<int64_t>::max(),
true};
285constexprbool isLessThanZero()
const{
return Quantity < 0; }
287constexprbool isGreaterThanZero()
const{
return Quantity > 0; }
289constexprbool isCompatibleImmediate(
const Immediate &Imm)
const{
290returnisZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
293constexprbool isMin()
const{
294return Quantity == std::numeric_limits<ScalarTy>::min();
297constexprbool isMax()
const{
298return Quantity == std::numeric_limits<ScalarTy>::max();
301// Arithmetic 'operators' that cast to unsigned types first. 302constexpr Immediate addUnsigned(
const Immediate &RHS)
const{
303assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
305return {
Value, Scalable ||
RHS.isScalable()};
308constexpr Immediate subUnsigned(
const Immediate &RHS)
const{
309assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
311return {
Value, Scalable ||
RHS.isScalable()};
314// Scale the quantity by a constant without caring about runtime scalability. 315constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const{
317return {
Value, Scalable};
320// Helpers for generating SCEVs with vscale terms where needed. 343// This is needed for the Compare type of std::map when Immediate is used 344// as a key. We don't need it to be fully correct against any value of vscale, 345// just to make sure that vscale-related terms in the map are considered against 346// each other rather than being mixed up and potentially missing opportunities. 347structKeyOrderTargetImmediate {
348bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const{
349if (
LHS.isScalable() && !
RHS.isScalable())
351if (!
LHS.isScalable() &&
RHS.isScalable())
353returnLHS.getKnownMinValue() <
RHS.getKnownMinValue();
357// This would be nicer if we could be generic instead of directly using size_t, 358// but there doesn't seem to be a type trait for is_orderable or 359// is_lessthan_comparable or similar. 360structKeyOrderSizeTAndImmediate {
361bool operator()(
const std::pair<size_t, Immediate> &LHS,
362const std::pair<size_t, Immediate> &RHS)
const{
363size_t LSize =
LHS.first;
364size_t RSize =
RHS.first;
367return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
370}
// end anonymous namespace 372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 374OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
384/// Map register candidates to information about how they are used. 388 RegUsesTy RegUsesMap;
392void countRegister(
constSCEV *Reg,
size_t LUIdx);
393void dropRegister(
constSCEV *Reg,
size_t LUIdx);
394void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
396bool isRegUsedByUsesOtherThan(
constSCEV *Reg,
size_t LUIdx)
const;
411}
// end anonymous namespace 414RegUseTracker::countRegister(
constSCEV *Reg,
size_t LUIdx) {
415 std::pair<RegUsesTy::iterator, bool> Pair =
416 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
421 RSD.UsedByIndices.set(LUIdx);
425RegUseTracker::dropRegister(
constSCEV *Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(Reg);
427assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
429assert(RSD.UsedByIndices.size() > LUIdx);
430 RSD.UsedByIndices.reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435assert(LUIdx <= LastLUIdx);
437// Update RegUses. The data structure is not optimized for this purpose; 438// we must iterate through it and update each of the bit vectors. 439for (
auto &Pair : RegUsesMap) {
441if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
constSCEV *Reg,
size_t LUIdx)
const{
450 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
451if (
I == RegUsesMap.end())
455if (i == -1)
returnfalse;
456if ((
size_t)i != LUIdx)
returntrue;
461 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
462assert(
I != RegUsesMap.end() &&
"Unknown register!");
463returnI->second.UsedByIndices;
466void RegUseTracker::clear() {
473/// This class holds information that describes a formula for computing 474/// satisfying a use. It may include broken-out immediates and scaled registers. 476 /// Global base address used for complex addressing. 479 /// Base offset for complex addressing. 480 Immediate BaseOffset = Immediate::getZero();
482 /// Whether any complex addressing has a base register. 483bool HasBaseReg =
false;
485 /// The scale of any complex addressing. 488 /// The list of "base" registers for this use. When this is non-empty. The 489 /// canonical representation of a formula is 490 /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and 491 /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). 492 /// 3. The reg containing recurrent expr related with currect loop in the 493 /// formula should be put in the ScaledReg. 494 /// #1 enforces that the scaled register is always used when at least two 495 /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. 496 /// #2 enforces that 1 * reg is reg. 497 /// #3 ensures invariant regs with respect to current loop can be combined 498 /// together in LSR codegen. 499 /// This invariant can be temporarily broken while building a formula. 500 /// However, every formula inserted into the LSRInstance must be in canonical 504 /// The 'scaled' register for this use. This should be non-null when Scale is 506constSCEV *ScaledReg =
nullptr;
508 /// An additional constant offset which added near the use. This requires a 509 /// temporary register, but the offset itself can live in an add immediate 510 /// field rather than a register. 511 Immediate UnfoldedOffset = Immediate::getZero();
519void canonicalize(
constLoop &L);
523bool hasZeroEnd()
const;
525size_t getNumRegs()
const;
528void deleteBaseReg(
constSCEV *&S);
530bool referencesReg(
constSCEV *S)
const;
531bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
532const RegUseTracker &RegUses)
const;
538}
// end anonymous namespace 540/// Recursion helper for initialMatch. 545// Collect expressions which properly dominate the loop header. 551// Look at add operands. 553for (
constSCEV *S :
Add->operands())
558// Look at addrec operands. 560if (!AR->getStart()->isZero() && AR->isAffine()) {
563 AR->getStepRecurrence(SE),
564// FIXME: AR->getNoWrapFlags() 570// Handle a multiplication by -1 (negation) if it didn't fold. 579constSCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
581for (
constSCEV *S : MyGood)
583for (
constSCEV *S : MyBad)
588// Ok, we can't do anything interesting. Just stuff the whole thing into a 589// register and hope for the best. 593/// Incorporate loop-variant parts of S into this Formula, attempting to keep 594/// all loop-invariant and loop-computable values in a single base register. 602 BaseRegs.push_back(Sum);
608 BaseRegs.push_back(Sum);
616return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
620/// Check whether or not this formula satisfies the canonical 622/// \see Formula::BaseRegs. 623bool Formula::isCanonical(
constLoop &L)
const{
624assert((Scale == 0 || ScaledReg) &&
625"ScaledReg must be non-null if Scale is non-zero");
628return BaseRegs.size() <= 1;
633if (Scale == 1 && BaseRegs.empty())
639// If ScaledReg is not a recurrent expr, or it is but its loop is not current 640// loop, meanwhile BaseRegs contains a recurrent expr reg related with current 641// loop, we want to swap the reg in BaseRegs with ScaledReg. 647/// Helper method to morph a formula into its canonical representation. 648/// \see Formula::BaseRegs. 649/// Every formula having more than one base register, must use the ScaledReg 650/// field. Otherwise, we would have to do special cases everywhere in LSR 651/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... 652/// On the other hand, 1*reg should be canonicalized into reg. 653void Formula::canonicalize(
constLoop &L) {
657if (BaseRegs.empty()) {
658// No base reg? Use scale reg with scale = 1 as such. 659assert(ScaledReg &&
"Expected 1*reg => reg");
660assert(Scale == 1 &&
"Expected 1*reg => reg");
661 BaseRegs.push_back(ScaledReg);
667// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. 669 ScaledReg = BaseRegs.pop_back_val();
673// If ScaledReg is an invariant with respect to L, find the reg from 674// BaseRegs containing the recurrent expr related with Loop L. Swap the 675// reg with ScaledReg. 680if (
I != BaseRegs.end())
686/// Get rid of the scale in the formula. 687/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. 688/// \return true if it was possible to get rid of the scale, false otherwise. 689/// \note After this operation the formula may not be in the canonical form. 690bool Formula::unscale() {
694 BaseRegs.push_back(ScaledReg);
699bool Formula::hasZeroEnd()
const{
700if (UnfoldedOffset || BaseOffset)
702if (BaseRegs.size() != 1 || ScaledReg)
707/// Return the total number of register operands used by this formula. This does 708/// not include register uses implied by non-constant addrec strides. 709size_t Formula::getNumRegs()
const{
710return !!ScaledReg + BaseRegs.size();
713/// Return the type of this formula, if it has one, or null otherwise. This type 714/// is meaningless except for the bit size. 715Type *Formula::getType()
const{
716return !BaseRegs.empty() ? BaseRegs.front()->getType() :
717 ScaledReg ? ScaledReg->getType() :
718 BaseGV ? BaseGV->getType() :
722/// Delete the given base reg from the BaseRegs list. 723void Formula::deleteBaseReg(
constSCEV *&S) {
724if (&S != &BaseRegs.back())
729/// Test if this formula references the given register. 730bool Formula::referencesReg(
constSCEV *S)
const{
734/// Test whether this formula uses registers which are used by uses other than 735/// the use with the given index. 736bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
737const RegUseTracker &RegUses)
const{
739if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
741for (
constSCEV *BaseReg : BaseRegs)
742if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 752 BaseGV->printAsOperand(
OS,
/*PrintType=*/false);
754if (BaseOffset.isNonZero()) {
758for (
constSCEV *BaseReg : BaseRegs) {
760OS <<
"reg(" << *BaseReg <<
')';
762if (HasBaseReg && BaseRegs.empty()) {
764OS <<
"**error: HasBaseReg**";
765 }
elseif (!HasBaseReg && !BaseRegs.empty()) {
767OS <<
"**error: !HasBaseReg**";
771OS << Scale <<
"*reg(";
778if (UnfoldedOffset.isNonZero()) {
780OS <<
"imm(" << UnfoldedOffset <<
')';
789/// Return true if the given addrec can be sign-extended without changing its 797/// Return true if the given add can be sign-extended without changing its 805/// Return true if the given mul can be sign-extended without changing its 814/// Return an expression for LHS /s RHS, if it can be determined and if the 815/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits 816/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that 817/// the multiplication may overflow, which is useful when the result will be 818/// used in a context where the most significant bits are ignored. 821bool IgnoreSignificantBits =
false) {
822// Handle the trivial case, which works for any SCEV type. 826// Handle a few RHS special cases. 830// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do 837// Handle x /s 1 as x. 842// Check for a division of a constant by a constant. 846constAPInt &LA =
C->getAPInt();
853// Distribute the sdiv over addrec operands, if the addrec doesn't overflow. 857 IgnoreSignificantBits);
858if (!Step)
returnnullptr;
860 IgnoreSignificantBits);
861if (!Start)
returnnullptr;
862// FlagNW is independent of the start value, step direction, and is 863// preserved with smaller magnitude steps. 864// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 870// Distribute the sdiv over add operands, if the add doesn't overflow. 874for (
constSCEV *S :
Add->operands()) {
876if (!
Op)
returnnullptr;
884// Check for a multiply operand that we can pull RHS out of. 887// Handle special case C1*X*Y /s C2*X*Y. 892 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
907 IgnoreSignificantBits)) {
918// Otherwise we don't know. 922/// If S involves the addition of a constant integer value, return that integer 923/// value, and mutate S to point to a new SCEV with that value excluded. 926if (
C->getAPInt().getSignificantBits() <= 64) {
928return Immediate::getFixed(
C->getValue()->getSExtValue());
933if (Result.isNonZero())
939if (Result.isNonZero())
941// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 944 }
elseif (
constSCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
946if (
constSCEVConstant *
C = dyn_cast<SCEVConstant>(M->getOperand(0)))
947if (isa<SCEVVScale>(M->getOperand(1))) {
949return Immediate::getScalable(
C->getValue()->getSExtValue());
953return Immediate::getZero();
956/// If S involves the addition of a GlobalValue address, return that symbol, and 957/// mutate S to point to a new SCEV with that value excluded. 959if (
constSCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
960if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
975// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 982/// Returns true if the specified instruction is using the specified value as an 986bool isAddress = isa<LoadInst>(Inst);
987if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
988if (SI->getPointerOperand() == OperandVal)
991// Addressing modes can also be folded into prefetches and a variety 993switch (
II->getIntrinsicID()) {
994case Intrinsic::memset:
995case Intrinsic::prefetch:
996case Intrinsic::masked_load:
997if (
II->getArgOperand(0) == OperandVal)
1000case Intrinsic::masked_store:
1001if (
II->getArgOperand(1) == OperandVal)
1004case Intrinsic::memmove:
1005case Intrinsic::memcpy:
1006if (
II->getArgOperand(0) == OperandVal ||
1007II->getArgOperand(1) == OperandVal)
1013if (IntrInfo.
PtrVal == OperandVal)
1018 }
elseif (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1019if (RMW->getPointerOperand() == OperandVal)
1022if (CmpX->getPointerOperand() == OperandVal)
1028/// Return the type of the memory being accessed. 1031 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1033// First get the type of memory being accessed. 1035 AccessTy.MemTy = Ty;
1037// Then get the pointer address space. 1038if (
constStoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1039 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1040 }
elseif (
constLoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1041 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1042 }
elseif (
constAtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1043 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1045 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1047switch (
II->getIntrinsicID()) {
1048case Intrinsic::prefetch:
1049case Intrinsic::memset:
1050 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1051 AccessTy.MemTy = OperandVal->
getType();
1053case Intrinsic::memmove:
1054case Intrinsic::memcpy:
1056 AccessTy.MemTy = OperandVal->
getType();
1058case Intrinsic::masked_load:
1059 AccessTy.AddrSpace =
1060II->getArgOperand(0)->getType()->getPointerAddressSpace();
1062case Intrinsic::masked_store:
1063 AccessTy.AddrSpace =
1064II->getArgOperand(1)->getType()->getPointerAddressSpace();
1081/// Return true if this AddRec is already a phi in its loop. 1093/// Check if expanding this expression is likely to incur significant cost. This 1094/// is tricky because SCEV doesn't track which expressions are actually computed 1095/// by the current IR. 1097/// We currently allow expansion of IV increments that involve adds, 1098/// multiplication by constants, and AddRecs from existing phis. 1100/// TODO: Allow UDivExpr if we can find an existing IV increment that is an 1101/// obvious multiple of the UDivExpr. 1105// Zero/One operand expressions 1124if (!Processed.
insert(S).second)
1128for (
constSCEV *S :
Add->operands()) {
1137// Multiplication by a constant is ok 1141// If we have the value of one operand, check if an existing 1142// multiplication already generates this expression. 1144Value *UVal = U->getValue();
1146// If U is a constant, it may be used by a ConstantExpr. 1148if (UI && UI->
getOpcode() == Instruction::Mul &&
1162// Fow now, consider any other type of expression (div/mul/min/max) high cost. 1170}
// end anonymous namespace 1172/// Check if the addressing mode defined by \p F is completely 1173/// folded in \p LU at isel time. 1174/// This includes address-mode folding and special icmp tricks. 1175/// This function returns true if \p LU can accommodate what \p F 1176/// defines and up to 1 base + 1 scaled + offset. 1177/// In other words, if \p F has several base registers, this function may 1178/// still return true. Therefore, users still need to account for 1179/// additional base registers and/or unfolded offsets to derive an 1180/// accurate cost model. 1182const LSRUse &LU,
const Formula &
F);
1184// Get the cost of the scaling factor used in F for LU. 1186const LSRUse &LU,
const Formula &
F,
1191/// This class is used to measure and compare candidate formulae. 1193constLoop *
L =
nullptr;
1219// Once any of the metrics loses, they must all remain losers. 1221return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1222 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1223 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1224 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1230returnC.NumRegs == ~0
u;
1233void RateFormula(
const Formula &
F,
1243void RateRegister(
const Formula &
F,
constSCEV *
Reg,
1245void RatePrimaryRegister(
const Formula &
F,
constSCEV *
Reg,
1250/// An operand value in an instruction which is to be replaced with some 1251/// equivalent, possibly strength-reduced, replacement. 1253 /// The instruction which will be updated. 1256 /// The operand of the instruction which will be replaced. The operand may be 1257 /// used more than once; every instance will be replaced. 1258Value *OperandValToReplace =
nullptr;
1260 /// If this user is to use the post-incremented value of an induction 1261 /// variable, this set is non-empty and holds the loops associated with the 1262 /// induction variable. 1265 /// A constant offset to be added to the LSRUse expression. This allows 1266 /// multiple fixups to share the same LSRUse with different offsets, for 1267 /// example in an unrolled loop. 1268 Immediate
Offset = Immediate::getZero();
1270 LSRFixup() =
default;
1272bool isUseFullyOutsideLoop(
constLoop *L)
const;
1278/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted 1279/// SmallVectors of const SCEV*. 1280structUniquifierDenseMapInfo {
1283V.push_back(
reinterpret_cast<constSCEV *
>(-1));
1289V.push_back(
reinterpret_cast<constSCEV *
>(-2));
1303/// This class holds the state that LSR keeps for each use in IVUsers, as well 1304/// as uses invented by LSR itself. It includes information about what kinds of 1305/// things can be folded into the user, information about the user itself, and 1306/// information about how the use may be satisfied. TODO: Represent multiple 1307/// users of the same expression in common? 1312 /// An enum for a kind of use, indicating what types of scaled and immediate 1313 /// operands it might support. 1315Basic,
///< A normal use, with no folding. 1316 Special,
///< A special case of basic, allowing -1 scales. 1317Address,
///< An address use; folding according to TargetLowering 1318 ICmpZero
///< An equality icmp with both operands folded into one. 1319// TODO: Add a generic icmp too? 1325 MemAccessTy AccessTy;
1327 /// The list of operands which are to be replaced. 1330 /// Keep track of the min and max offsets of the fixups. 1331 Immediate MinOffset = Immediate::getFixedMax();
1332 Immediate MaxOffset = Immediate::getFixedMin();
1334 /// This records whether all of the fixups using this LSRUse are outside of 1335 /// the loop, in which case some special-case heuristics may be used. 1336bool AllFixupsOutsideLoop =
true;
1338 /// RigidFormula is set to true to guarantee that this use will be associated 1339 /// with a single formula--the one that initially matched. Some SCEV 1340 /// expressions cannot be expanded. This allows LSR to consider the registers 1341 /// used by those expressions without the need to expand them later after 1342 /// changing the formula. 1343bool RigidFormula =
false;
1345 /// This records the widest use type for any fixup using this 1346 /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max 1347 /// fixup widths to be equivalent, because the narrower one may be relying on 1348 /// the implicit truncation to truncate away bogus bits. 1349Type *WidestFixupType =
nullptr;
1351 /// A list of ways to build a value that can satisfy this user. After the 1352 /// list is populated, one of these is selected heuristically and used to 1353 /// formulate a replacement for OperandValToReplace in UserInst. 1356 /// The set of register candidates used by all formulae in this LSRUse. 1359 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1361 LSRFixup &getNewFixup() {
1362Fixups.push_back(LSRFixup());
1366void pushFixup(LSRFixup &f) {
1368if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1369 MaxOffset =
f.Offset;
1370if (Immediate::isKnownLT(
f.Offset, MinOffset))
1371 MinOffset =
f.Offset;
1374bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1375float getNotSelectedProbability(
constSCEV *Reg)
const;
1376bool InsertFormula(
const Formula &
F,
constLoop &L);
1377void DeleteFormula(Formula &
F);
1378void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1384}
// end anonymous namespace 1387 LSRUse::KindType Kind, MemAccessTy AccessTy,
1389bool HasBaseReg, int64_t Scale,
1393if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1397if (
constauto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1399if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1401if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1403 [&](
unsigned i,
constSCEV *Reg) {
1404 return i + getSetupCost(Reg, Depth - 1);
1406if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1412/// Tally up interesting quantities from the given register. 1413void Cost::RateRegister(
const Formula &
F,
constSCEV *Reg,
1416// If this is an addrec for another loop, it should be an invariant 1417// with respect to L since L is the innermost loop (at least 1418// for now LSR only handles innermost loops). 1419if (AR->getLoop() != L) {
1420// If the AddRec exists, consider it's register free and leave it alone. 1424// It is bad to allow LSR for current loop to add induction variables 1425// for its sibling loops. 1426if (!AR->getLoop()->contains(L)) {
1431// Otherwise, it will be an invariant with respect to Loop L. 1436unsigned LoopCost = 1;
1440// If the step size matches the base offset, we could use pre-indexed 1443if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1444if (Step->getAPInt() ==
F.BaseOffset.getFixedValue())
1447constSCEV *LoopStep = AR->getStepRecurrence(*SE);
1448if (isa<SCEVConstant>(LoopStep)) {
1449constSCEV *LoopStart = AR->getStart();
1450if (!isa<SCEVConstant>(LoopStart) &&
1456C.AddRecCost += LoopCost;
1458// Add the step value register, if it needs one. 1459// TODO: The non-affine case isn't precisely modeled here. 1460if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1461if (!Regs.
count(AR->getOperand(1))) {
1462 RateRegister(
F, AR->getOperand(1), Regs);
1470// Rough heuristic; favor registers which don't require extra setup 1471// instructions in the preheader. 1473// Ensure we don't, even with the recusion limit, produce invalid costs. 1474C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1476C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1480/// Record this register in the set. If we haven't seen it before, rate 1481/// it. Optional LoserRegs provides a way to declare any formula that refers to 1482/// one of those regs an instant loser. 1483void Cost::RatePrimaryRegister(
const Formula &
F,
constSCEV *Reg,
1486if (LoserRegs && LoserRegs->
count(Reg)) {
1490if (Regs.
insert(Reg).second) {
1491 RateRegister(
F, Reg, Regs);
1492if (LoserRegs && isLoser())
1497void Cost::RateFormula(
const Formula &
F,
1504assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1505// Tally up the registers. 1506unsigned PrevAddRecCost =
C.AddRecCost;
1507unsigned PrevNumRegs =
C.NumRegs;
1508unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1509if (
constSCEV *ScaledReg =
F.ScaledReg) {
1510if (VisitedRegs.
count(ScaledReg)) {
1514 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1518for (
constSCEV *BaseReg :
F.BaseRegs) {
1519if (VisitedRegs.
count(BaseReg)) {
1523 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1528// Determine how many (unfolded) adds we'll need inside the loop. 1529size_t NumBaseParts =
F.getNumRegs();
1530if (NumBaseParts > 1)
1531// Do not count the base and a possible second register if the target 1532// allows to fold 2 registers. 1535C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1537// Accumulate non-free scaling amounts. 1540// Tally up the non-zero immediates. 1541for (
const LSRFixup &
Fixup : LU.Fixups) {
1542if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1543 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1545C.ImmCost += 64;
// Handle symbolic values conservatively. 1546// TODO: This should probably be the pointer size. 1547elseif (
Offset.isNonZero())
1551// Check with target if this offset with this instruction is 1552// specifically not supported. 1553if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1558// Incompatible immediate type, increase cost to avoid using 1563// If we don't count instruction cost exit here. 1569// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as 1570// additional instruction (at least fill). 1571// TODO: Need distinguish register class? 1574if (
C.NumRegs > TTIRegNum) {
1575// Cost already exceeded TTIRegNum, then only newly added register can add 1577if (PrevNumRegs > TTIRegNum)
1578C.Insns += (
C.NumRegs - PrevNumRegs);
1580C.Insns += (
C.NumRegs - TTIRegNum);
1583// If ICmpZero formula ends with not 0, it could not be replaced by 1584// just add or sub. We'll need to compare final result of AddRec. 1585// That means we'll need an additional instruction. But if the target can 1586// macro-fuse a compare with a branch, don't count this extra instruction. 1587// For -10 + {0, +, 1}: 1593if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1596// Each new AddRec adds 1 instruction to calculation. 1597C.Insns += (
C.AddRecCost - PrevAddRecCost);
1599// BaseAdds adds instructions for unfolded registers. 1600if (LU.Kind != LSRUse::ICmpZero)
1601C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1605/// Set this cost to a losing value. 1607C.Insns = std::numeric_limits<unsigned>::max();
1608C.NumRegs = std::numeric_limits<unsigned>::max();
1609C.AddRecCost = std::numeric_limits<unsigned>::max();
1610C.NumIVMuls = std::numeric_limits<unsigned>::max();
1611C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1612C.ImmCost = std::numeric_limits<unsigned>::max();
1613C.SetupCost = std::numeric_limits<unsigned>::max();
1614C.ScaleCost = std::numeric_limits<unsigned>::max();
1617/// Choose the lower cost. 1618bool Cost::isLess(
constCost &
Other)
const{
1621returnC.Insns <
Other.C.Insns;
1625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1628OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1629OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1630if (
C.AddRecCost != 0)
1631OS <<
", with addrec cost " <<
C.AddRecCost;
1632if (
C.NumIVMuls != 0)
1633OS <<
", plus " <<
C.NumIVMuls <<
" IV mul" 1634 << (
C.NumIVMuls == 1 ?
"" :
"s");
1635if (
C.NumBaseAdds != 0)
1636OS <<
", plus " <<
C.NumBaseAdds <<
" base add" 1637 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1638if (
C.ScaleCost != 0)
1639OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1641OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1642if (
C.SetupCost != 0)
1643OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1651/// Test whether this fixup always uses its value outside of the given loop. 1652bool LSRFixup::isUseFullyOutsideLoop(
constLoop *L)
const{
1653// PHI nodes use their value in their incoming blocks. 1654if (
constPHINode *PN = dyn_cast<PHINode>(UserInst)) {
1655for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1656if (PN->getIncomingValue(i) == OperandValToReplace &&
1657L->contains(PN->getIncomingBlock(i)))
1662return !
L->contains(UserInst);
1665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1668// Store is common and interesting enough to be worth special-casing. 1669if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1671Store->getOperand(0)->printAsOperand(
OS,
/*PrintType=*/false);
1672 }
elseif (UserInst->getType()->isVoidTy())
1673OS << UserInst->getOpcodeName();
1675 UserInst->printAsOperand(
OS,
/*PrintType=*/false);
1677OS <<
", OperandValToReplace=";
1678 OperandValToReplace->printAsOperand(
OS,
/*PrintType=*/false);
1680for (
constLoop *PIL : PostIncLoops) {
1681OS <<
", PostIncLoop=";
1682 PIL->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
1694/// Test whether this use as a formula which has the same registers as the given 1696bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const{
1698if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1699// Unstable sort by host order ok, because this is only used for uniquifying. 1701return Uniquifier.
count(Key);
1704/// The function returns a probability of selecting formula without Reg. 1705float LSRUse::getNotSelectedProbability(
constSCEV *Reg)
const{
1707for (
const Formula &
F : Formulae)
1708if (
F.referencesReg(Reg))
1710return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1713/// If the given formula has not yet been inserted, add it to the list, and 1714/// return true. Return false otherwise. The formula must be in canonical form. 1715bool LSRUse::InsertFormula(
const Formula &
F,
constLoop &L) {
1716assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1718if (!Formulae.empty() && RigidFormula)
1722if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1723// Unstable sort by host order ok, because this is only used for uniquifying. 1726if (!Uniquifier.
insert(Key).second)
1729// Using a register to hold the value of 0 is not profitable. 1730assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1731"Zero allocated in a scaled register!");
1733for (
constSCEV *BaseReg :
F.BaseRegs)
1734assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1737// Add the formula to the list. 1738 Formulae.push_back(
F);
1740// Record registers now being used by this use. 1741 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1748/// Remove the given formula from this use's list. 1749void LSRUse::DeleteFormula(Formula &
F) {
1750if (&
F != &Formulae.back())
1752 Formulae.pop_back();
1755/// Recompute the Regs field, and update RegUses. 1756void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1757// Now that we've filtered out some formulae, recompute the Regs set. 1760for (
const Formula &
F : Formulae) {
1761if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1762 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1765// Update the RegTracker. 1766for (
constSCEV *S : OldRegs)
1768 RegUses.dropRegister(S, LUIdx);
1771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1773OS <<
"LSR Use: Kind=";
1776case Special:
OS <<
"Special";
break;
1777case ICmpZero:
OS <<
"ICmpZero";
break;
1780if (AccessTy.MemTy->isPointerTy())
1781OS <<
"pointer";
// the full pointer type could be really verbose 1783OS << *AccessTy.MemTy;
1786OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1790bool NeedComma =
false;
1791for (
const LSRFixup &
Fixup : Fixups) {
1792if (NeedComma)
OS <<
',';
1798if (AllFixupsOutsideLoop)
1799OS <<
", all-fixups-outside-loop";
1802OS <<
", widest fixup type: " << *WidestFixupType;
1811 LSRUse::KindType Kind, MemAccessTy AccessTy,
1813bool HasBaseReg, int64_t Scale,
1816case LSRUse::Address: {
1817 int64_t FixedOffset =
1818 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1819 int64_t ScalableOffset =
1820 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1822 HasBaseReg, Scale, AccessTy.AddrSpace,
1823Fixup, ScalableOffset);
1825case LSRUse::ICmpZero:
1826// There's not even a target hook for querying whether it would be legal to 1827// fold a GV into an ICmp. 1831// ICmp only has two operands; don't allow more than two non-trivial parts. 1832if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1835// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by 1836// putting the scaled register in the other operand of the icmp. 1837if (Scale != 0 && Scale != -1)
1840// If we have low-level target information, ask the target if it can fold an 1841// integer immediate on an icmp. 1842if (BaseOffset.isNonZero()) {
1843// We don't have an interface to query whether the target supports 1844// icmpzero against scalable quantities yet. 1845if (BaseOffset.isScalable())
1849// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset 1850// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset 1851// Offs is the ICmp immediate. 1853// The cast does the right thing with 1854// std::numeric_limits<int64_t>::min(). 1855 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1859// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg 1863// Only handle single-register values. 1864return !BaseGV && Scale == 0 && BaseOffset.isZero();
1866case LSRUse::Special:
1867// Special case Basic to handle -1 scales. 1868return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1875 Immediate MinOffset, Immediate MaxOffset,
1876 LSRUse::KindType Kind, MemAccessTy AccessTy,
1878bool HasBaseReg, int64_t Scale) {
1879if (BaseOffset.isNonZero() &&
1880 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1881 BaseOffset.isScalable() != MaxOffset.isScalable()))
1883// Check for overflow. 1884 int64_t
Base = BaseOffset.getKnownMinValue();
1885 int64_t Min = MinOffset.getKnownMinValue();
1886 int64_t Max = MaxOffset.getKnownMinValue();
1889 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1892 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1895 HasBaseReg, Scale) &&
1901 Immediate MinOffset, Immediate MaxOffset,
1902 LSRUse::KindType Kind, MemAccessTy AccessTy,
1903const Formula &
F,
constLoop &L) {
1904// For the purpose of isAMCompletelyFolded either having a canonical formula 1905// or a scale not equal to zero is correct. 1906// Problems may arise from non canonical formulae having a scale == 0. 1907// Strictly speaking it would best to just rely on canonical formulae. 1908// However, when we generate the scaled formulae, we first check that the 1909// scaling factor is profitable before computing the actual ScaledReg for 1910// compile time sake. 1911assert((
F.isCanonical(L) ||
F.Scale != 0));
1913F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1916/// Test whether we know how to expand the current formula. 1918 Immediate MaxOffset, LSRUse::KindType Kind,
1920 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1921// We know how to expand completely foldable formulae. 1923 BaseOffset, HasBaseReg, Scale) ||
1924// Or formulae that use a base register produced by a sum of base 1928 BaseGV, BaseOffset,
true, 0));
1932 Immediate MaxOffset, LSRUse::KindType Kind,
1933 MemAccessTy AccessTy,
const Formula &
F) {
1934returnisLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1935F.BaseOffset,
F.HasBaseReg,
F.Scale);
1947const LSRUse &LU,
const Formula &
F) {
1948// Target may want to look at the user instructions. 1950for (
const LSRFixup &
Fixup : LU.Fixups)
1952 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1959 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1964const LSRUse &LU,
const Formula &
F,
1969// If the use is not completely folded in that instruction, we will have to 1970// pay an extra cost only for scale != 1. 1976case LSRUse::Address: {
1977// Check the scaling factor cost with both the min and max offsets. 1978 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1979if (
F.BaseOffset.isScalable()) {
1980 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1981 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1983 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1984 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1988F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1991F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1994"Legal addressing mode has an illegal cost!");
1995return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1997case LSRUse::ICmpZero:
1999case LSRUse::Special:
2000// The use is completely folded, i.e., everything is folded into the 2009 LSRUse::KindType Kind, MemAccessTy AccessTy,
2012// Fast-path: zero is always foldable. 2013if (BaseOffset.isZero() && !BaseGV)
2016// Conservatively, create an address with an immediate and a 2018 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2020// Canonicalize a scale of 1 to a base register if the formula doesn't 2021// already have a base register. 2022if (!HasBaseReg && Scale == 1) {
2027// FIXME: Try with + without a scale? Maybe based on TTI? 2028// I think basereg + scaledreg + immediateoffset isn't a good 'conservative' 2029// default for many architectures, not just AArch64 SVE. More investigation 2030// needed later to determine if this should be used more widely than just 2031// on scalable types. 2032if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2042 Immediate MaxOffset, LSRUse::KindType Kind,
2043 MemAccessTy AccessTy,
constSCEV *S,
2045// Fast-path: zero is always foldable. 2046if (S->
isZero())
returntrue;
2048// Conservatively, create an address with an immediate and a 2053// If there's anything else involved, it's not foldable. 2054if (!S->
isZero())
returnfalse;
2056// Fast-path: zero is always foldable. 2057if (BaseOffset.isZero() && !BaseGV)
2060if (BaseOffset.isScalable())
2063// Conservatively, create an address with an immediate and a 2065 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2068 BaseOffset, HasBaseReg, Scale);
2073/// An individual increment in a Chain of IV increments. Relate an IV user to 2074/// an expression that computes the IV it uses from the IV used by the previous 2075/// link in the Chain. 2077/// For the head of a chain, IncExpr holds the absolute SCEV expression for the 2078/// original IVOperand. The head of the chain's IVOperand is only valid during 2079/// chain collection, before LSR replaces IV users. During chain generation, 2080/// IncExpr can be used to find the new IVOperand that computes the same 2088 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2091// The list of IV increments in program order. We typically add the head of a 2092// chain without finding subsequent links. 2095constSCEV *ExprBase =
nullptr;
2098 IVChain(
const IVInc &Head,
constSCEV *
Base)
2099 : Incs(1, Head), ExprBase(
Base) {}
2103// Return the first increment in the chain. 2106return std::next(Incs.
begin());
2112// Returns true if this chain contains any increments. 2113bool hasIncs()
const{
return Incs.
size() >= 2; }
2115// Add an IVInc to the end of this chain. 2118// Returns the last UserInst in the chain. 2121// Returns true if IncExpr can be profitably added to this chain. 2122bool isProfitableIncrement(
constSCEV *OperExpr,
2127/// Helper for CollectChains to track multiple IV increment uses. Distinguish 2128/// between FarUsers that definitely cross IV increments and NearUsers that may 2129/// be used between IV increments. 2135/// This class holds state for the main loop strength reduction logic. 2150 /// This is the insert position that the current loop's induction variable 2151 /// increment should be placed. In simple loops, this is the latch block's 2152 /// terminator. But in more complicated cases, this is a position which will 2153 /// dominate all the in-loop post-increment users. 2156 /// Interesting factors between use strides. 2158 /// We explicitly use a SetVector which contains a SmallSet, instead of the 2159 /// default, a SmallDenseSet, because we need to use the full range of 2160 /// int64_ts, and there's currently no good way of doing that with 2164 /// The cost of the current SCEV, the best solution by LSR will be dropped if 2165 /// the solution is not profitable. 2168 /// Interesting use types, to facilitate truncation reuse. 2171 /// The list of interesting uses. 2174 /// Track which uses use which register candidates. 2175 RegUseTracker RegUses;
2177// Limit the number of chains to avoid quadratic behavior. We don't expect to 2178// have more than a few IV increment chains in a loop. Missing a Chain falls 2179// back to normal LSR behavior for those uses. 2180staticconstunsigned MaxChains = 8;
2182 /// IV users can form a chain of IV increments. 2185 /// IV users that belong to profitable IVChains. 2188 /// Induction variables that were generated and inserted by the SCEV Expander. 2191// Inserting instructions in the loop and using them as PHI's input could 2192// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the 2193// corresponding incoming block is not loop exiting). So collect all such 2194// instructions to form LCSSA for them later. 2197void OptimizeShadowIV();
2200void OptimizeLoopTermCond();
2204void FinalizeChain(IVChain &Chain);
2205void CollectChains();
2206void GenerateIVChain(
const IVChain &Chain,
2209void CollectInterestingTypesAndFactors();
2210void CollectFixupsAndInitialFormulae();
2212// Support for sharing of LSRUses between LSRFixups. 2216bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2217 LSRUse::KindType Kind, MemAccessTy AccessTy);
2219 std::pair<size_t, Immediate> getUse(
constSCEV *&Expr, LSRUse::KindType Kind,
2220 MemAccessTy AccessTy);
2222void DeleteUse(LSRUse &LU,
size_t LUIdx);
2224 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2226void InsertInitialFormula(
constSCEV *S, LSRUse &LU,
size_t LUIdx);
2227void InsertSupplementalFormula(
constSCEV *S, LSRUse &LU,
size_t LUIdx);
2228void CountRegisters(
const Formula &
F,
size_t LUIdx);
2229bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2231void CollectLoopInvariantFixupsAndFormulae();
2233void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2236void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2238size_tIdx,
bool IsScaledReg =
false);
2239void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2240void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2242bool IsScaledReg =
false);
2243void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2244void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2247size_tIdx,
bool IsScaledReg =
false);
2248void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2250void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2251void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2252void GenerateCrossUseConstantOffsets();
2253void GenerateAllReuseFormulae();
2255void FilterOutUndesirableDedicatedRegisters();
2257size_t EstimateSearchSpaceComplexity()
const;
2258void NarrowSearchSpaceByDetectingSupersets();
2259void NarrowSearchSpaceByCollapsingUnrolledCode();
2260void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2261void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2262void NarrowSearchSpaceByFilterPostInc();
2263void NarrowSearchSpaceByDeletingCostlyFormulas();
2264void NarrowSearchSpaceByPickingWinnerRegs();
2265void NarrowSearchSpaceUsingHeuristics();
2280const LSRUse &LU)
const;
2282Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2285void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2288void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2297bool getChanged()
const{
return Changed; }
2299return ScalarEvolutionIVs;
2309}
// end anonymous namespace 2311/// If IV is used in a int-to-float cast inside the loop then try to eliminate 2312/// the cast operation. 2313void LSRInstance::OptimizeShadowIV() {
2315if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2319 UI != E;
/* empty */) {
2323Type *DestTy =
nullptr;
2324bool IsSigned =
false;
2326/* If shadow use is a int->float cast then insert a second IV 2327 to eliminate this cast. 2329 for (unsigned i = 0; i < n; ++i) 2335 for (unsigned i = 0; i < n; ++i, ++d) 2338if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2340 DestTy = UCast->getDestTy();
2342elseif (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2344 DestTy = SCast->getDestTy();
2346if (!DestTy)
continue;
2348// If target does not support DestTy natively then do not apply 2349// this transformation. 2356// If the calculation in integers overflows, the result in FP type will 2357// differ. So we only can do this transformation if we are guaranteed to not 2358// deal with overflowing values 2366if (Mantissa == -1)
continue;
2370unsignedEntry, Latch;
2381Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2382 (
double)
Init->getSExtValue() :
2383 (
double)
Init->getZExtValue());
2388if (Incr->
getOpcode() != Instruction::Add
2389 && Incr->
getOpcode() != Instruction::Sub)
2392/* Initialize new IV, double d = 0.0 in above example. */ 2403// Ignore negative constants, as the code below doesn't handle them 2404// correctly. TODO: Remove this restriction. 2405if (!
C->getValue().isStrictlyPositive())
2408/* Add new PHINode. */ 2412/* create new increment. '++d' in above example. */ 2413Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2415 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2416 : Instruction::FSub,
2423/* Remove cast operation */ 2431/// If Cond has an operand that is an expression of an IV, set the IV user and 2432/// stride information and return true, otherwise return false. 2435if (
U.getUser() ==
Cond) {
2436// NOTE: we could handle setcc instructions with multiple uses here, but 2437// InstCombine does it as well for simple uses, it's not clear that it 2438// occurs enough in real life to handle. 2445/// Rewrite the loop's terminating condition if it uses a max computation. 2447/// This is a narrow solution to a specific, but acute, problem. For loops 2453/// } while (++i < n); 2455/// the trip count isn't just 'n', because 'n' might not be positive. And 2456/// unfortunately this can come up even for loops where the user didn't use 2457/// a C do-while loop. For example, seemingly well-behaved top-test loops 2458/// will commonly be lowered like this: 2464/// } while (++i < n); 2467/// and then it's possible for subsequent optimization to obscure the if 2468/// test in such a way that indvars can't find it. 2470/// When indvars can't find the if test in loops like this, it creates a 2471/// max expression, which allows it to give the loop a canonical 2472/// induction variable: 2475/// max = n < 1 ? 1 : n; 2478/// } while (++i != max); 2480/// Canonical induction variables are necessary because the loop passes 2481/// are designed around them. The most obvious example of this is the 2482/// LoopInfo analysis, which doesn't remember trip count values. It 2483/// expects to be able to rediscover the trip count each time it is 2484/// needed, and it does this using a simple analysis that only succeeds if 2485/// the loop has a canonical induction variable. 2487/// However, when it comes time to generate code, the maximum operation 2488/// can be quite costly, especially if it's inside of an outer loop. 2490/// This function solves this problem by detecting this type of loop and 2491/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting 2492/// the instructions for the maximum computation. 2494// Check that the loop matches the pattern we're looking for. 2503if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2507// Add one to the backedge-taken count to get the trip count. 2508constSCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2511// Check for a max calculation that matches the pattern. There's no check 2512// for ICMP_ULE here because the comparison would be with zero, which 2513// isn't interesting. 2516if (
constSCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2517 Pred = ICmpInst::ICMP_SLE;
2519 }
elseif (
constSCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2520 Pred = ICmpInst::ICMP_SLT;
2522 }
elseif (
constSCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2523 Pred = ICmpInst::ICMP_ULT;
2530// To handle a max with more than two operands, this optimization would 2531// require additional checking and setup. 2532if (
Max->getNumOperands() != 2)
2535constSCEV *MaxLHS =
Max->getOperand(0);
2536constSCEV *MaxRHS =
Max->getOperand(1);
2538// ScalarEvolution canonicalizes constants to the left. For < and >, look 2539// for a comparison with 1. For <= and >=, a comparison with zero. 2541 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2544// Check the relevant induction variable for conformance to 2554"Loop condition operand is an addrec in a different loop!");
2556// Check the right operand of the select, and remember it, as it will 2557// be used in the new comparison instruction. 2558Value *NewRHS =
nullptr;
2559if (ICmpInst::isTrueWhenEqual(Pred)) {
2560// Look for n+1, and grab n. 2562if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2563if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2564 NewRHS = BO->getOperand(0);
2566if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2567if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2568 NewRHS = BO->getOperand(0);
2575elseif (
constSCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2576 NewRHS = SU->getValue();
2578// Max doesn't match expected pattern. 2581// Determine the new comparison opcode. It may be signed or unsigned, 2582// and the original comparison may be either equality or inequality. 2586// Ok, everything looks ok to change the condition into an SLT or SGE and 2587// delete the max calculation. 2589Cond->getOperand(0), NewRHS,
"scmp");
2591// Delete the max calculation instructions. 2593Cond->replaceAllUsesWith(NewCond);
2596Cond->eraseFromParent();
2598if (
Cmp->use_empty())
2599Cmp->eraseFromParent();
2603/// Change loop terminating condition to use the postinc iv when possible. 2605LSRInstance::OptimizeLoopTermCond() {
2608// We need a different set of heuristics for rotated and non-rotated loops. 2609// If a loop is rotated then the latch is also the backedge, so inserting 2610// post-inc expressions just before the latch is ideal. To reduce live ranges 2611// it also makes sense to rewrite terminating conditions to use post-inc 2614// If the loop is not rotated then the latch is not a backedge; the latch 2615// check is done in the loop head. Adding post-inc expressions before the 2616// latch will cause overlapping live-ranges of pre-inc and post-inc expressions 2617// in the loop body. In this case we do *not* want to use post-inc expressions 2618// in the latch check, and we want to insert post-inc expressions before 2622L->getExitingBlocks(ExitingBlocks);
2624// The backedge doesn't exit the loop; treat this as a head-tested loop. 2629// Otherwise treat this as a rotated loop. 2630for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2631// Get the terminating condition for the loop if possible. If we 2632// can, we want to change it to use a post-incremented version of its 2633// induction variable, to allow coalescing the live ranges for the IV into 2634// one register value. 2636BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2639// FIXME: Overly conservative, termination condition could be an 'or' etc.. 2643// Search IVUsesByStride to find Cond's IVUse if there is one. 2646if (!FindIVUserForCond(
Cond, CondUse))
2649// If the trip count is computed in terms of a max (due to ScalarEvolution 2650// being unable to find a sufficient guard, for example), change the loop 2651// comparison to use SLT or ULT instead of NE. 2652// One consequence of doing this now is that it disrupts the count-down 2653// optimization. That's not always a bad thing though, because in such 2654// cases it may still be worthwhile to avoid a max. 2657// If this exiting block dominates the latch block, it may also use 2658// the post-inc value if it won't be shared with other uses. 2659// Check for dominance. 2660if (!DT.
dominates(ExitingBlock, LatchBlock))
2663// Conservatively avoid trying to use the post-inc value in non-latch 2664// exits if there may be pre-inc users in intervening blocks. 2665if (LatchBlock != ExitingBlock)
2667// Test if the use is reachable from the exiting block. This dominator 2668// query is a conservative approximation of reachability. 2669if (&UI != CondUse &&
2671// Conservatively assume there may be reuse if the quotient of their 2672// strides could be a legal scale. 2673constSCEV *
A = IU.getStride(*CondUse, L);
2674constSCEV *
B = IU.getStride(UI, L);
2675if (!
A || !
B)
continue;
2687// Stride of one or negative one can have reuse with non-addresses. 2688if (
C->isOne() ||
C->isMinusOne())
2689goto decline_post_inc;
2690// Avoid weird situations. 2691if (
C->getValue().getSignificantBits() >= 64 ||
2692C->getValue().isMinSignedValue())
2693goto decline_post_inc;
2694// Check for possible scaled-address reuse. 2696 MemAccessTy AccessTy =
2698 int64_t Scale =
C->getSExtValue();
2701/*HasBaseReg=*/true, Scale,
2702 AccessTy.AddrSpace))
2703goto decline_post_inc;
2707/*HasBaseReg=*/true, Scale,
2708 AccessTy.AddrSpace))
2709goto decline_post_inc;
2714LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: " 2717// It's possible for the setcc instruction to be anywhere in the loop, and 2718// possible for it to have multiple users. If it is not immediately before 2719// the exiting block branch, move it. 2720if (
Cond->getNextNonDebugInstruction() != TermBr) {
2721if (
Cond->hasOneUse()) {
2724// Clone the terminating condition and insert into the loopend. 2726Cond = cast<ICmpInst>(
Cond->clone());
2727Cond->setName(
L->getHeader()->getName() +
".termcond");
2730// Clone the IVUse, as the old use still exists! 2736// If we get to here, we know that we can transform the setcc instruction to 2737// use the post-incremented version of the IV, allowing us to coalesce the 2738// live ranges for the IV correctly. 2746// Determine an insertion point for the loop induction variable increment. It 2747// must dominate all the post-inc comparisons we just set up, and it must 2748// dominate the loop latch edge. 2749 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2754/// Determine if the given use can accommodate a fixup at the given offset and 2755/// other details. If so, update the use and return true. 2756bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2757bool HasBaseReg, LSRUse::KindType Kind,
2758 MemAccessTy AccessTy) {
2759 Immediate NewMinOffset = LU.MinOffset;
2760 Immediate NewMaxOffset = LU.MaxOffset;
2761 MemAccessTy NewAccessTy = AccessTy;
2763// Check for a mismatched kind. It's tempting to collapse mismatched kinds to 2764// something conservative, however this can pessimize in the case that one of 2765// the uses will have all its uses outside the loop, for example. 2769// Check for a mismatched access type, and fall back conservatively as needed. 2770// TODO: Be less conservative when the type is similar and can use the same 2772if (Kind == LSRUse::Address) {
2773if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2774 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2775 AccessTy.AddrSpace);
2779// Conservatively assume HasBaseReg is true for now. 2780if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2782 LU.MaxOffset - NewOffset, HasBaseReg))
2784 NewMinOffset = NewOffset;
2785 }
elseif (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2787 NewOffset - LU.MinOffset, HasBaseReg))
2789 NewMaxOffset = NewOffset;
2792// FIXME: We should be able to handle some level of scalable offset support 2793// for 'void', but in order to get basic support up and running this is 2795if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2796 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2800 LU.MinOffset = NewMinOffset;
2801 LU.MaxOffset = NewMaxOffset;
2802 LU.AccessTy = NewAccessTy;
2806/// Return an LSRUse index and an offset value for a fixup which needs the given 2807/// expression, with the given kind and optional access type. Either reuse an 2808/// existing use or create a new one, as needed. 2809std::pair<size_t, Immediate> LSRInstance::getUse(
constSCEV *&Expr,
2810 LSRUse::KindType Kind,
2811 MemAccessTy AccessTy) {
2815// Basic uses can't accept any offset, for example. 2817Offset,
/*HasBaseReg=*/true)) {
2819Offset = Immediate::getFixed(0);
2822 std::pair<UseMapTy::iterator, bool>
P =
2825// A use already existed with this base. 2826size_t LUIdx =
P.first->second;
2827 LSRUse &LU =
Uses[LUIdx];
2828if (reconcileNewOffset(LU,
Offset,
/*HasBaseReg=*/true, Kind, AccessTy))
2830return std::make_pair(LUIdx,
Offset);
2834size_t LUIdx =
Uses.size();
2835P.first->second = LUIdx;
2836Uses.push_back(LSRUse(Kind, AccessTy));
2837 LSRUse &LU =
Uses[LUIdx];
2841return std::make_pair(LUIdx,
Offset);
2844/// Delete the given use from the Uses list. 2845void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2846if (&LU != &
Uses.back())
2851 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2854/// Look for a use distinct from OrigLU which is has a formula that has the same 2855/// registers as the given formula. 2857LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2858const LSRUse &OrigLU) {
2859// Search all uses for the formula. This could be more clever. 2860for (LSRUse &LU :
Uses) {
2861// Check whether this use is close enough to OrigLU, to see whether it's 2862// worthwhile looking through its formulae. 2863// Ignore ICmpZero uses because they may contain formulae generated by 2864// GenerateICmpZeroScales, in which case adding fixup offsets may 2866if (&LU != &OrigLU &&
2867 LU.Kind != LSRUse::ICmpZero &&
2868 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2869 LU.WidestFixupType == OrigLU.WidestFixupType &&
2870 LU.HasFormulaWithSameRegs(OrigF)) {
2871// Scan through this use's formulae. 2872for (
const Formula &
F : LU.Formulae) {
2873// Check to see if this formula has the same registers and symbols 2875if (
F.BaseRegs == OrigF.BaseRegs &&
2876F.ScaledReg == OrigF.ScaledReg &&
2877F.BaseGV == OrigF.BaseGV &&
2878F.Scale == OrigF.Scale &&
2879F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2880if (
F.BaseOffset.isZero())
2882// This is the formula where all the registers and symbols matched; 2883// there aren't going to be any others. Since we declined it, we 2884// can skip the rest of the formulae and proceed to the next LSRUse. 2891// Nothing looked good. 2895void LSRInstance::CollectInterestingTypesAndFactors() {
2898// Collect interesting types and strides. 2901constSCEV *Expr = IU.getExpr(U);
2905// Collect interesting types. 2908// Add strides for mentioned loops. 2919 }
while (!Worklist.
empty());
2922// Compute interesting factors from the set of interesting strides. 2924I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2926 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2927constSCEV *OldStride = *
I;
2928constSCEV *NewStride = *NewStrideIter;
2939 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2941if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2942 Factors.insert(Factor->getAPInt().getSExtValue());
2947if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2948 Factors.insert(Factor->getAPInt().getSExtValue());
2952// If all uses use the same type, don't bother looking for truncation-based 2954if (
Types.size() == 1)
2960/// Helper for CollectChains that finds an IV operand (computed by an AddRec in 2961/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to 2962/// IVStrideUses, we could partially skip this. 2966for(; OI != OE; ++OI) {
2967if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2972 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2981/// IVChain logic must consistently peek base TruncInst operands, so wrap it in 2982/// a convenient helper. 2984if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2985return Trunc->getOperand(0);
2989/// Return an approximation of this SCEV expression's "base", or NULL for any 2990/// constant. Returning the expression itself is conservative. Returning a 2991/// deeper subexpression is more precise and valid as long as it isn't less 2992/// complex than another subexpression. For expressions involving multiple 2993/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids 2994/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i], 2997/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost 2998/// SCEVUnknown, we simply return the rightmost SCEV operand. 3001default:
// including scUnknown. 3007returngetExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3009returngetExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3011returngetExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3013// Skip over scaled operands (scMulExpr) to follow add operands as long as 3014// there's nothing more complex. 3015// FIXME: not sure if we want to recognize negation. 3024return S;
// all operands are scaled, be conservative. 3027returngetExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3032/// Return true if the chain increment is profitable to expand into a loop 3033/// invariant value, which may require its own register. A profitable chain 3034/// increment will be an offset relative to the same base. We allow such offsets 3035/// to potentially be used as chain increment as long as it's not obviously 3036/// expensive to expand using real instructions. 3037bool IVChain::isProfitableIncrement(
constSCEV *OperExpr,
3040// Aggressively form chains when -stress-ivchain. 3044// Do not replace a constant offset from IV head with a nonconstant IV 3046if (!isa<SCEVConstant>(IncExpr)) {
3048if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3056/// Return true if the number of registers needed for the chain is estimated to 3057/// be less than the number required for the individual IV users. First prohibit 3058/// any IV users that keep the IV live across increments (the Users set should 3059/// be empty). Next count the number and type of increments in the chain. 3061/// Chaining IVs can lead to considerable code bloat if ISEL doesn't 3062/// effectively use postinc addressing modes. Only consider it profitable it the 3063/// increments can be computed in fewer registers when chained. 3065/// TODO: Consider IVInc free if it's already used in another chains. 3073if (!Chain.hasIncs())
3076if (!
Users.empty()) {
3077LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3079 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3082assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3084// The chain itself may require a register, so intialize cost to 1. 3087// A complete chain likely eliminates the need for keeping the original IV in 3088// a register. LSR does not currently know how to form a complete chain unless 3089// the header phi already exists. 3090if (isa<PHINode>(Chain.tailUserInst())
3091 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3094constSCEV *LastIncExpr =
nullptr;
3095unsigned NumConstIncrements = 0;
3096unsigned NumVarIncrements = 0;
3097unsigned NumReusedIncrements = 0;
3102for (
const IVInc &Inc : Chain) {
3105if (Inc.IncExpr->isZero())
3108// Incrementing by zero or some constant is neutral. We assume constants can 3109// be folded into an addressing mode or an add's immediate operand. 3110if (isa<SCEVConstant>(Inc.IncExpr)) {
3111 ++NumConstIncrements;
3115if (Inc.IncExpr == LastIncExpr)
3116 ++NumReusedIncrements;
3120 LastIncExpr = Inc.IncExpr;
3122// An IV chain with a single increment is handled by LSR's postinc 3123// uses. However, a chain with multiple increments requires keeping the IV's 3124// value live longer than it needs to be if chained. 3125if (NumConstIncrements > 1)
3128// Materializing increment expressions in the preheader that didn't exist in 3129// the original code may cost a register. For example, sign-extended array 3130// indices can produce ridiculous increments like this: 3131// IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64))) 3132 cost += NumVarIncrements;
3134// Reusing variable increments likely saves a register to hold the multiple of 3136 cost -= NumReusedIncrements;
3138LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3144/// Add this IV user to an existing chain or make it the head of a new chain. 3147// When IVs are used as types of varying widths, they are generally converted 3148// to a wider type with some uses remaining narrow under a (free) trunc. 3153// Visit all existing chains. Check if its IVOper can be computed as a 3154// profitable loop invariant increment from the last link in the Chain. 3155unsigned ChainIdx = 0, NChains = IVChainVec.size();
3156constSCEV *LastIncExpr =
nullptr;
3157for (; ChainIdx < NChains; ++ChainIdx) {
3158 IVChain &Chain = IVChainVec[ChainIdx];
3160// Prune the solution space aggressively by checking that both IV operands 3161// are expressions that operate on the same unscaled SCEVUnknown. This 3162// "base" will be canceled by the subsequent getMinusSCEV call. Checking 3163// first avoids creating extra SCEV expressions. 3171// A phi node terminates a chain. 3172if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3175// The increment must be loop-invariant so it can be kept in a register. 3178if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3181if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3182 LastIncExpr = IncExpr;
3186// If we haven't found a chain, create a new one, unless we hit the max. Don't 3187// bother for phi nodes, because they must be last in the chain. 3188if (ChainIdx == NChains) {
3189if (isa<PHINode>(UserInst))
3195 LastIncExpr = OperExpr;
3196// IVUsers may have skipped over sign/zero extensions. We don't currently 3197// attempt to form chains involving extensions unless they can be hoisted 3198// into this loop's AddRec. 3199if (!isa<SCEVAddRecExpr>(LastIncExpr))
3202 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3204 ChainUsersVec.
resize(NChains);
3205LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3206 <<
") IV=" << *LastIncExpr <<
"\n");
3208LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3209 <<
") IV+" << *LastIncExpr <<
"\n");
3210// Add this IV user to the end of the chain. 3211 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3213 IVChain &Chain = IVChainVec[ChainIdx];
3216// This chain's NearUsers become FarUsers. 3217if (!LastIncExpr->
isZero()) {
3218 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3223// All other uses of IVOperand become near uses of the chain. 3224// We currently ignore intermediate values within SCEV expressions, assuming 3225// they will eventually be used be the current chain, or can be computed 3226// from one of the chain increments. To be more precise we could 3227// transitively follow its user and only add leaf IV users to the set. 3232// Uses in the chain will no longer be uses if the chain is formed. 3233// Include the head of the chain in this iteration (not Chain.begin()). 3234 IVChain::const_iterator IncIter = Chain.Incs.begin();
3235 IVChain::const_iterator IncEnd = Chain.Incs.end();
3236for( ; IncIter != IncEnd; ++IncIter) {
3237if (IncIter->UserInst == OtherUse)
3240if (IncIter != IncEnd)
3244 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3245 && IU.isIVUserOrOperand(OtherUse)) {
3248 NearUsers.
insert(OtherUse);
3251// Since this user is part of the chain, it's no longer considered a use 3253 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3256/// Populate the vector of Chains. 3258/// This decreases ILP at the architecture level. Targets with ample registers, 3259/// multiple memory ports, and no register renaming probably don't want 3260/// this. However, such targets should probably disable LSR altogether. 3262/// The job of LSR is to make a reasonable choice of induction variables across 3263/// the loop. Subsequent passes can easily "unchain" computation exposing more 3264/// ILP *within the loop* if the target wants it. 3266/// Finding the best IV chain is potentially a scheduling problem. Since LSR 3267/// will not reorder memory operations, it will recognize this as a chain, but 3268/// will generate redundant IV increments. Ideally this would be corrected later 3269/// by a smart scheduler: 3275/// TODO: Walk the entire domtree within this loop, not just the path to the 3276/// loop latch. This will discover chains on side paths, but requires 3277/// maintaining multiple copies of the Chains state. 3278void LSRInstance::CollectChains() {
3285 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3290// Walk the instruction stream from the loop header to the loop latch. 3293// Skip instructions that weren't seen by IVUsers analysis. 3294if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3297// Ignore users that are part of a SCEV expression. This way we only 3298// consider leaf IV Users. This effectively rediscovers a portion of 3299// IVUsers analysis but in program order this time. 3303// Remove this instruction from any NearUsers set it may be in. 3304for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3305 ChainIdx < NChains; ++ChainIdx) {
3306 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3308// Search for operands that can be chained. 3312while (IVOpIter != IVOpEnd) {
3313Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3314if (UniqueOperands.
insert(IVOpInst).second)
3315 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3316 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3318 }
// Continue walking down the instructions. 3319 }
// Continue walking down the domtree. 3320// Visit phi backedges to determine if the chain can generate the IV postinc. 3321for (
PHINode &PN :
L->getHeader()->phis()) {
3326 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3328 ChainInstruction(&PN, IncV, ChainUsersVec);
3330// Remove any unprofitable chains. 3331unsigned ChainIdx = 0;
3332for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3333 UsersIdx < NChains; ++UsersIdx) {
3335 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3337// Preserve the chain at UsesIdx. 3338if (ChainIdx != UsersIdx)
3339 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3340 FinalizeChain(IVChainVec[ChainIdx]);
3343 IVChainVec.resize(ChainIdx);
3346void LSRInstance::FinalizeChain(IVChain &Chain) {
3347assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3348LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3350for (
const IVInc &Inc : Chain) {
3352auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3353assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3354 IVIncSet.insert(UseI);
3358/// Return true if the IVInc can be folded into an addressing mode. 3361constSCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3362 Immediate IncOffset = Immediate::getZero();
3368// Look for mul(vscale, constant), to detect a scalable offset. 3369auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3370if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3371 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3373auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3374if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3376 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3384 IncOffset,
/*HasBaseReg=*/false))
3390/// Generate an add or subtract for each IVInc in a chain to materialize the IV 3391/// user's operand from the previous IV user's operand. 3392void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3394// Find the new IVOperand for the head of the chain. It may have been replaced 3396const IVInc &Head = Chain.Incs[0];
3398// findIVOperand returns IVOpEnd if it can no longer find a valid IV user. 3401Value *IVSrc =
nullptr;
3402while (IVOpIter != IVOpEnd) {
3405// If this operand computes the expression that the chain needs, we may use 3406// it. (Check this after setting IVSrc which is used below.) 3408// Note that if Head.IncExpr is wider than IVSrc, then this phi is too 3409// narrow for the chain, so we can no longer use it. We do allow using a 3410// wider phi, assuming the LSR checked for free truncation. In that case we 3411// should already have a truncate on this operand such that 3412// getSCEV(IVSrc) == IncExpr. 3413if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3414 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3417 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3419if (IVOpIter == IVOpEnd) {
3420// Gracefully give up on this chain. 3421LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3424assert(IVSrc &&
"Failed to find IV chain source");
3429constSCEV *LeftOverExpr =
nullptr;
3434for (
const IVInc &Inc : Chain) {
3436if (isa<PHINode>(InsertPt))
3437 InsertPt =
L->getLoopLatch()->getTerminator();
3439// IVOper will replace the current IV User's operand. IVSrc is the IV 3440// value currently held in a register. 3441Value *IVOper = IVSrc;
3442if (!Inc.IncExpr->isZero()) {
3443// IncExpr was the result of subtraction of two narrow values, so must 3447 LeftOverExpr = LeftOverExpr ?
3448 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3451// Look through each base to see if any can produce a nice addressing mode. 3452bool FoundBase =
false;
3453for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3456if (!Remainder->
isZero()) {
3458Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3459constSCEV *IVOperExpr =
3461 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3470if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3471// Expand the IV increment. 3473Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3476 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3478// If an IV increment can't be folded, use it as the next IV value. 3480assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3483 LeftOverExpr =
nullptr;
3486Type *OperTy = Inc.IVOperand->getType();
3487if (IVTy != OperTy) {
3489"cannot extend a chained IV");
3491 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3493 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3494if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3497// If LSR created a new, wider phi, we may also replace its postinc. We only 3498// do this if we also found a wide value for the head of the chain. 3499if (isa<PHINode>(Chain.tailUserInst())) {
3500for (
PHINode &Phi :
L->getHeader()->phis()) {
3504Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3507Value *IVOper = IVSrc;
3509if (IVTy != PostIncTy) {
3511IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3512 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3513 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3515Phi.replaceUsesOfWith(PostIncV, IVOper);
3521void LSRInstance::CollectFixupsAndInitialFormulae() {
3523bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3525// For calculating baseline cost 3532// Skip IV users that are part of profitable IV Chains. 3535assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3536if (IVIncSet.count(UseI)) {
3537LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3541 LSRUse::KindType
Kind = LSRUse::Basic;
3542 MemAccessTy AccessTy;
3544Kind = LSRUse::Address;
3548constSCEV *S = IU.getExpr(U);
3553// Equality (== and !=) ICmps are special. We can rewrite (i == N) as 3554// (N - i == 0), and this allows (N - i) to be the expression that we work 3555// with rather than just N or i, so we can consider the register 3556// requirements for both N and i at the same time. Limiting this code to 3557// equality icmps is not a problem because all interesting loops use 3558// equality icmps, thanks to IndVarSimplify. 3559if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3560// If CI can be saved in some target, like replaced inside hardware loop 3561// in PowerPC, no need to generate initial formulae for it. 3562if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3564if (CI->isEquality()) {
3565// Swap the operands if needed to put the OperandValToReplace on the 3566// left, for consistency. 3568if (NV ==
U.getOperandValToReplace()) {
3569 CI->setOperand(1, CI->getOperand(0));
3570 CI->setOperand(0, NV);
3571NV = CI->getOperand(1);
3575// x == y --> x - y == 0 3578 (!
NV->getType()->isPointerTy() ||
3580// S is normalized, so normalize N before folding it into S 3581// to keep the result normalized. 3585Kind = LSRUse::ICmpZero;
3587 }
elseif (
L->isLoopInvariant(NV) &&
3588 (!isa<Instruction>(NV) ||
3589 DT.
dominates(cast<Instruction>(NV),
L->getHeader())) &&
3590 !
NV->getType()->isPointerTy()) {
3591// If we can't generally expand the expression (e.g. it contains 3592// a divide), but it is already at a loop invariant point before the 3593// loop, wrap it in an unknown (to prevent the expander from trying 3594// to re-expand in a potentially unsafe way.) The restriction to 3595// integer types is required because the unknown hides the base, and 3596// SCEV can't compute the difference of two unknown pointers. 3601Kind = LSRUse::ICmpZero;
3603assert(!isa<SCEVCouldNotCompute>(S));
3606// -1 and the negations of all interesting strides (except the negation 3607// of -1) are now also interesting. 3608for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3609if (Factors[i] != -1)
3610 Factors.insert(-(
uint64_t)Factors[i]);
3615// Get or create an LSRUse. 3616 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3617size_t LUIdx =
P.first;
3619 LSRUse &LU =
Uses[LUIdx];
3622 LSRFixup &LF = LU.getNewFixup();
3623 LF.UserInst = UserInst;
3624 LF.OperandValToReplace =
U.getOperandValToReplace();
3625 LF.PostIncLoops = TmpPostIncLoops;
3627 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3629// Create SCEV as Formula for calculating baseline cost 3630if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3632F.initialMatch(S, L, SE);
3633 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3634 VisitedLSRUse.
insert(LUIdx);
3637if (!LU.WidestFixupType ||
3640 LU.WidestFixupType = LF.OperandValToReplace->getType();
3642// If this is the first use of this LSRUse, give it a formula. 3643if (LU.Formulae.empty()) {
3644 InsertInitialFormula(S, LU, LUIdx);
3645 CountRegisters(LU.Formulae.back(), LUIdx);
3652/// Insert a formula for the given expression into the given use, separating out 3653/// loop-variant portions from loop-invariant and loop-computable portions. 3654void LSRInstance::InsertInitialFormula(
constSCEV *S, LSRUse &LU,
3656// Mark uses whose expressions cannot be expanded. 3658 LU.RigidFormula =
true;
3661F.initialMatch(S, L, SE);
3662boolInserted = InsertFormula(LU, LUIdx,
F);
3663assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3666/// Insert a simple single-register formula for the given expression into the 3669LSRInstance::InsertSupplementalFormula(
constSCEV *S,
3670 LSRUse &LU,
size_t LUIdx) {
3672F.BaseRegs.push_back(S);
3674boolInserted = InsertFormula(LU, LUIdx,
F);
3675assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3678/// Note which registers are used by the given formula, updating RegUses. 3679void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3681 RegUses.countRegister(
F.ScaledReg, LUIdx);
3682for (
constSCEV *BaseReg :
F.BaseRegs)
3683 RegUses.countRegister(BaseReg, LUIdx);
3686/// If the given formula has not yet been inserted, add it to the list, and 3687/// return true. Return false otherwise. 3688bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3689// Do not insert formula that we will not be able to expand. 3691"Formula is illegal");
3693if (!LU.InsertFormula(
F, *L))
3696 CountRegisters(
F, LUIdx);
3700/// Check for other uses of loop-invariant values which we're tracking. These 3701/// other uses will pin these values in registers, making them less profitable 3703/// TODO: This currently misses non-constant addrec step registers. 3704/// TODO: Should this give more weight to users inside the loop? 3706LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3710// Don't collect outside uses if we are favoring postinc - the instructions in 3711// the loop are more important than the ones outside of it. 3715while (!Worklist.
empty()) {
3718// Don't process the same SCEV twice 3719if (!Visited.
insert(S).second)
3729 }
elseif (
constSCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3730constValue *
V = US->getValue();
3731if (
constInstruction *Inst = dyn_cast<Instruction>(V)) {
3732// Look for instructions defined outside the loop. 3733if (
L->contains(Inst))
continue;
3734 }
elseif (isa<Constant>(V))
3735// Constants can be re-materialized. 3737for (
constUse &U :
V->uses()) {
3738constInstruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3739// Ignore non-instructions. 3742// Don't bother if the instruction is an EHPad. 3745// Ignore instructions in other functions (as can happen with 3747if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3749// Ignore instructions not dominated by the loop. 3750constBasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3752 cast<PHINode>(UserInst)->getIncomingBlock(
3756// Don't bother if the instruction is in a BB which ends in an EHPad. 3760// Ignore cases in which the currently-examined value could come from 3761// a basic block terminated with an EHPad. This checks all incoming 3762// blocks of the phi node since it is possible that the same incoming 3763// value comes from multiple basic blocks, only some of which may end 3764// in an EHPad. If any of them do, a subsequent rewrite attempt by this 3765// pass would try to insert instructions into an EHPad, hitting an 3767if (isa<PHINode>(UserInst)) {
3768constauto *PhiNode = cast<PHINode>(UserInst);
3769bool HasIncompatibleEHPTerminatedBlock =
false;
3771for (
unsignedintI = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3772if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3773if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3774 HasIncompatibleEHPTerminatedBlock =
true;
3779if (HasIncompatibleEHPTerminatedBlock) {
3784// Don't bother rewriting PHIs in catchswitch blocks. 3785if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3787// Ignore uses which are part of other SCEV expressions, to avoid 3788// analyzing them multiple times. 3791// If the user is a no-op, look through to its uses. 3792if (!isa<SCEVUnknown>(UserS))
3800// Ignore icmp instructions which are already being analyzed. 3801if (
constICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3802unsigned OtherIdx = !
U.getOperandNo();
3803Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3808 std::pair<size_t, Immediate>
P =
3809 getUse(S, LSRUse::Basic, MemAccessTy());
3810size_t LUIdx =
P.first;
3812 LSRUse &LU =
Uses[LUIdx];
3813 LSRFixup &LF = LU.getNewFixup();
3815 LF.OperandValToReplace =
U;
3817 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3818if (!LU.WidestFixupType ||
3821 LU.WidestFixupType = LF.OperandValToReplace->getType();
3822 InsertSupplementalFormula(US, LU, LUIdx);
3823 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3830/// Split S into subexpressions which can be pulled out into separate 3831/// registers. If C is non-null, multiply each subexpression by C. 3833/// Return remainder expression after factoring the subexpressions captured by 3834/// Ops. If Ops is complete, return NULL. 3840// Arbitrarily cap recursion to protect compile time. 3845// Break out add operands. 3846for (
constSCEV *S :
Add->operands()) {
3852 }
elseif (
constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3853// Split a non-zero base out of an addrec. 3859// Split the non-zero AddRec unless it is part of a nested recurrence that 3860// does not pertain to this loop. 3861if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3871//FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 3875// Break (C * (a + b + c)) into C*a + C*b + C*c. 3881constSCEV *Remainder =
3891/// Return true if the SCEV represents a value that may end up as a 3892/// post-increment operation. 3894 LSRUse &LU,
constSCEV *S,
constLoop *L,
3896if (LU.Kind != LSRUse::Address ||
3897 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3903if (!isa<SCEVConstant>(LoopStep))
3905// Check if a post-indexed load/store can be used. 3915/// Helper function for LSRInstance::GenerateReassociations. 3916void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3921// Don't generate reassociations for the base register of a value that 3922// may generate a post-increment operator. The reason is that the 3923// reassociations cause extra base+register formula to be created, 3924// and possibly chosen, but the post-increment is more efficient. 3932if (AddOps.
size() == 1)
3938// Loop-variant "unknown" values are uninteresting; we won't be able to 3939// do anything meaningful with them. 3943// Don't pull a constant into a register if the constant could be folded 3944// into an immediate field. 3946 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3949// Collect all operands except *J. 3952 InnerAddOps.append(std::next(J),
3955// Don't leave just a constant behind in a register if the constant could 3956// be folded into an immediate field. 3957if (InnerAddOps.size() == 1 &&
3959 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3967if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3970// Add the remaining pieces of the add back into the new formula. 3971constSCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3976 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3979F.ScaledReg =
nullptr;
3982F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3983 }
elseif (IsScaledReg)
3984F.ScaledReg = InnerSum;
3986F.BaseRegs[
Idx] = InnerSum;
3988// Add J as its own register, or an unfolded immediate. 3992SC->getValue()->getZExtValue()))
3994 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3995SC->getValue()->getZExtValue());
3997F.BaseRegs.push_back(*J);
3998// We may have changed the number of register in base regs, adjust the 3999// formula accordingly. 4002if (InsertFormula(LU, LUIdx,
F))
4003// If that formula hadn't been seen before, recurse to find more like 4005// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2) 4006// Because just Depth is not enough to bound compile time. 4007// This means that every time AddOps.size() is greater 16^x we will add 4009 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4014/// Split out subexpressions from adds and the bases of addrecs. 4015void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4017assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4018// Arbitrarily cap recursion to protect compile time. 4022for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4023 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4026 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4027/* Idx */ -1,
/* IsScaledReg */true);
4030/// Generate a formula consisting of all of the loop-dominating registers added 4031/// into a single register. 4032void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4034// This method is only interesting on a plurality of registers. 4035if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4036 (
Base.UnfoldedOffset.isNonZero()) <=
4040// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before 4041// processing the formula. 4044 Formula NewBase =
Base;
4045 NewBase.BaseRegs.clear();
4046Type *CombinedIntegerType =
nullptr;
4047for (
constSCEV *BaseReg :
Base.BaseRegs) {
4050if (!CombinedIntegerType)
4055 NewBase.BaseRegs.push_back(BaseReg);
4058// If no register is relevant, we're done. 4062// Utility function for generating the required variants of the combined 4064auto GenerateFormula = [&](
constSCEV *Sum) {
4067// TODO: If Sum is zero, it probably means ScalarEvolution missed an 4068// opportunity to fold something. For now, just ignore such cases 4069// rather than proceed with zero in a register. 4073F.BaseRegs.push_back(Sum);
4075 (void)InsertFormula(LU, LUIdx,
F);
4078// If we collected at least two registers, generate a formula combining them. 4079if (Ops.
size() > 1) {
4084// If we have an unfolded offset, generate a formula combining it with the 4085// registers collected. 4086if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4087assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4089 NewBase.UnfoldedOffset.getFixedValue(),
true));
4090 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4095/// Helper function for LSRInstance::GenerateSymbolicOffsets. 4096void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4101if (
G->isZero() || !GV)
4105if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4111 (void)InsertFormula(LU, LUIdx,
F);
4114/// Generate reuse formulae using symbolic offsets. 4115void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4117// We can't add a symbolic offset if the address already contains one. 4118if (
Base.BaseGV)
return;
4120for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4121 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4123 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base,
/* Idx */ -1,
4124/* IsScaledReg */true);
4127/// Helper function for LSRInstance::GenerateConstantOffsets. 4128void LSRInstance::GenerateConstantOffsetsImpl(
4129 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4132auto GenerateOffset = [&](
constSCEV *
G, Immediate
Offset) {
4134if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4138if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4139// Add the offset to the base register. 4140constSCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4142// If it cancelled out, drop the base register, otherwise update it. 4146F.ScaledReg =
nullptr;
4148F.deleteBaseReg(
F.BaseRegs[
Idx]);
4150 }
elseif (IsScaledReg)
4153F.BaseRegs[
Idx] = NewG;
4155 (void)InsertFormula(LU, LUIdx,
F);
4161// With constant offsets and constant steps, we can generate pre-inc 4162// accesses by having the offset equal the step. So, for access #0 with a 4163// step of 8, we generate a G - 8 base which would require the first access 4164// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer 4165// for itself and hopefully becomes the base for other accesses. This means 4166// means that a single pre-indexed access can be generated to become the new 4167// base pointer for each iteration of the loop, resulting in no extra add/sub 4168// instructions for pointer updating. 4170if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
4172 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4173constAPInt &StepInt = StepRec->getAPInt();
4177for (Immediate
Offset : Worklist) {
4179Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4186for (Immediate
Offset : Worklist)
4190if (
G->isZero() ||
Imm.isZero() ||
4191 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4194F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4195if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4201// We may generate non canonical Formula if G is a recurrent expr reg 4202// related with current loop while F.ScaledReg is not. 4205 (void)InsertFormula(LU, LUIdx,
F);
4208/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. 4209void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4211// TODO: For now, just add the min and max offset, because it usually isn't 4212// worthwhile looking at everything inbetween. 4215if (LU.MaxOffset != LU.MinOffset)
4218for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4219 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4221 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist,
/* Idx */ -1,
4222/* IsScaledReg */true);
4225/// For ICmpZero, check to see if we can scale up the comparison. For example, x 4226/// == y -> x*c == y*c. 4227void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4229if (LU.Kind != LSRUse::ICmpZero)
return;
4231// Determine the integer type for the base formula. 4236// Don't do this if there is more than one offset. 4237if (LU.MinOffset != LU.MaxOffset)
return;
4239// Check if transformation is valid. It is illegal to multiply pointer. 4240if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4242for (
constSCEV *BaseReg :
Base.BaseRegs)
4245assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4247// Check each interesting stride. 4248for (int64_t Factor : Factors) {
4249// Check that Factor can be represented by IntTy 4252// Check that the multiplication doesn't overflow. 4253if (
Base.BaseOffset.isMin() && Factor == -1)
4255// Not supporting scalable immediates. 4256if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4258 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4259assert(Factor != 0 &&
"Zero factor not expected!");
4260if (NewBaseOffset.getFixedValue() / Factor !=
4261Base.BaseOffset.getFixedValue())
4263// If the offset will be truncated at this use, check that it is in bounds. 4268// Check that multiplying with the use offset doesn't overflow. 4269 Immediate
Offset = LU.MinOffset;
4270if (
Offset.isMin() && Factor == -1)
4273if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4275// If the offset will be truncated at this use, check that it is in bounds. 4281F.BaseOffset = NewBaseOffset;
4283// Check that this scale is legal. 4287// Compensate for the use having MinOffset built into it. 4288F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4292// Check that multiplying with each base register doesn't overflow. 4293for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4299// Check that multiplying with the scaled register doesn't overflow. 4306// Check that multiplying with the unfolded offset doesn't overflow. 4307if (
F.UnfoldedOffset.isNonZero()) {
4308if (
F.UnfoldedOffset.isMin() && Factor == -1)
4310F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4311if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4312Base.UnfoldedOffset.getFixedValue())
4314// If the offset will be truncated, check that it is in bounds. 4316 IntTy,
F.UnfoldedOffset.getFixedValue()))
4320// If we make it here and it's legal, add it. 4321 (void)InsertFormula(LU, LUIdx,
F);
4326/// Generate stride factor reuse formulae by making use of scaled-offset address 4327/// modes, for example. 4328void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4329// Determine the integer type for the base formula. 4333// If this Formula already has a scaled register, we can't add another one. 4334// Try to unscale the formula to generate a better scale. 4335if (
Base.Scale != 0 && !
Base.unscale())
4338assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4340// Check each interesting stride. 4341for (int64_t Factor : Factors) {
4343Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4344// Check whether this scale is going to be legal. 4345if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4347// As a special-case, handle special out-of-loop Basic users specially. 4348// TODO: Reconsider this special case. 4349if (LU.Kind == LSRUse::Basic &&
4351 LU.AccessTy,
Base) &&
4352 LU.AllFixupsOutsideLoop)
4353 LU.Kind = LSRUse::Special;
4357// For an ICmpZero, negating a solitary base register won't lead to 4359if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4360Base.BaseOffset.isZero() && !
Base.BaseGV)
4362// For each addrec base reg, if its loop is current loop, apply the scale. 4363for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4365if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4369// Divide out the factor, ignoring high bits, since we'll be 4370// scaling the value back up in the end. 4372if (!Quotient->isZero()) {
4373// TODO: This could be optimized to avoid all the copying. 4375F.ScaledReg = Quotient;
4376F.deleteBaseReg(
F.BaseRegs[i]);
4377// The canonical representation of 1*reg is reg, which is already in 4378// Base. In that case, do not try to insert the formula, it will be 4380if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4381 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4383// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate 4384// non canonical Formula with ScaledReg's loop not being L. 4385if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4387 (void)InsertFormula(LU, LUIdx,
F);
4394/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops. 4395/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then 4396/// perform the extension/truncate and normalize again, as the normalized form 4397/// can result in folds that are not valid in the post-inc use contexts. The 4398/// expressions for all PostIncLoopSets must match, otherwise return nullptr. 4403constSCEV *Result =
nullptr;
4404for (
auto &L :
Loops) {
4408if (!New || (Result && New != Result))
4413assert(Result &&
"failed to create expression");
4417/// Generate reuse formulae from different IV types. 4418void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4419// Don't bother truncating symbolic values. 4420if (
Base.BaseGV)
return;
4422// Determine the integer type for the base formula. 4428// It is invalid to extend a pointer type so exit early if ScaledReg or 4429// any of the BaseRegs are pointers. 4430if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4433 [](
constSCEV *S) { return S->getType()->isPointerTy(); }))
4437for (
auto &LF : LU.Fixups)
4438Loops.push_back(LF.PostIncLoops);
4440for (
Type *SrcTy : Types) {
4444// Sometimes SCEV is able to prove zero during ext transform. It may 4445// happen if SCEV did not do all possible transforms while creating the 4446// initial node (maybe due to depth limitations), but it can do them while 4449constSCEV *NewScaledReg =
4451if (!NewScaledReg || NewScaledReg->
isZero())
4453F.ScaledReg = NewScaledReg;
4455bool HasZeroBaseReg =
false;
4456for (
constSCEV *&BaseReg :
F.BaseRegs) {
4457constSCEV *NewBaseReg =
4459if (!NewBaseReg || NewBaseReg->
isZero()) {
4460 HasZeroBaseReg =
true;
4463 BaseReg = NewBaseReg;
4468// TODO: This assumes we've done basic processing on all uses and 4469// have an idea what the register usage is. 4470if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4474 (void)InsertFormula(LU, LUIdx,
F);
4481/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer 4482/// modifications so that the search phase doesn't have to worry about the data 4483/// structures moving underneath it. 4490 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4496}
// end anonymous namespace 4498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4500OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4501 <<
" , add offset " <<
Imm;
4509/// Look for registers which are a constant distance apart and try to form reuse 4510/// opportunities between them. 4511void LSRInstance::GenerateCrossUseConstantOffsets() {
4512// Group the registers by their value without any added constant offset. 4513usingImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4518for (
constSCEV *
Use : RegUses) {
4519constSCEV *
Reg =
Use;
// Make a copy for ExtractImmediate to modify. 4521auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4524 Pair.first->second.insert(std::make_pair(Imm,
Use));
4525 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4528// Now examine each set of registers with the same base value. Build up 4529// a list of work to do and do the work in a separate step so that we're 4530// not adding formulae and register counts while we're searching. 4534for (
constSCEV *Reg : Sequence) {
4535const ImmMapTy &Imms =
Map.find(Reg)->second;
4537// It's not worthwhile looking for reuse if there's only one offset. 4538if (Imms.size() == 1)
4541LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4542for (
constauto &Entry
4544 <<
' ' <<
Entry.first;
4547// Examine each offset. 4548for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4550constSCEV *OrigReg = J->second;
4552 Immediate JImm = J->first;
4553constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4555if (!isa<SCEVConstant>(OrigReg) &&
4556 UsedByIndicesMap[Reg].
count() == 1) {
4562// Conservatively examine offsets between this orig reg a few selected 4564 Immediate
First = Imms.begin()->first;
4565 Immediate
Last = std::prev(Imms.end())->first;
4566if (!
First.isCompatibleImmediate(
Last)) {
4571// Only scalable if both terms are scalable, or if one is scalable and 4573bool Scalable =
First.isScalable() ||
Last.isScalable();
4574 int64_t FI =
First.getKnownMinValue();
4575 int64_t LI =
Last.getKnownMinValue();
4576// Compute (First + Last) / 2 without overflow using the fact that 4577// First + Last = 2 * (First + Last) + (First ^ Last). 4578 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4579// If the result is negative and FI is odd and LI even (or vice versa), 4580// we rounded towards -inf. Add 1 in that case, to round towards 0. 4581 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4582 ImmMapTy::const_iterator OtherImms[] = {
4583 Imms.begin(), std::prev(Imms.end()),
4584 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4585for (
constauto &M : OtherImms) {
4586if (M == J || M == JE)
continue;
4587if (!JImm.isCompatibleImmediate(
M->first))
4590// Compute the difference between the two. 4591 Immediate
Imm = JImm.subUnsigned(
M->first);
4592for (
unsigned LUIdx : UsedByIndices.
set_bits())
4593// Make a memo of this use, offset, and register tuple. 4594if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4602 UsedByIndicesMap.
clear();
4603 UniqueItems.
clear();
4605// Now iterate through the worklist and add new formulae. 4606for (
constWorkItem &WI : WorkItems) {
4607size_t LUIdx = WI.LUIdx;
4608 LSRUse &LU =
Uses[LUIdx];
4609 Immediate
Imm = WI.Imm;
4610constSCEV *OrigReg = WI.OrigReg;
4613constSCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4616// TODO: Use a more targeted data structure. 4617for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4618 Formula
F = LU.Formulae[
L];
4619// FIXME: The code for the scaled and unscaled registers looks 4620// very similar but slightly different. Investigate if they 4621// could be merged. That way, we would not have to unscale the 4624// Use the immediate in the scaled register. 4625if (
F.ScaledReg == OrigReg) {
4626if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4628 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4629// Don't create 50 + reg(-50). 4630constSCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4631if (
F.referencesReg(S))
4635if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4638 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4640// If the new scale is a constant in a register, and adding the constant 4641// value to the immediate would produce a value closer to zero than the 4642// immediate itself, then the formula isn't worthwhile. 4643if (
constSCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4644// FIXME: Do we need to do something for scalable immediates here? 4645// A scalable SCEV won't be constant, but we might still have 4646// something in the offset? Bail out for now to be safe. 4647if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4649if (
C->getValue()->isNegative() !=
4650 (NewF.BaseOffset.isLessThanZero()) &&
4652 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4657 NewF.canonicalize(*this->L);
4658 (void)InsertFormula(LU, LUIdx, NewF);
4660// Use the immediate in a base register. 4661for (
size_tN = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4662constSCEV *BaseReg =
F.BaseRegs[
N];
4663if (BaseReg != OrigReg)
4666if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4667 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4668 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4670 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4672 LU.Kind, LU.AccessTy, NewF)) {
4676 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4680 NewF.UnfoldedOffset = NewUnfoldedOffset;
4682 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4684// If the new formula has a constant in a register, and adding the 4685// constant value to the immediate would produce a value closer to 4686// zero than the immediate itself, then the formula isn't worthwhile. 4687for (
constSCEV *NewReg : NewF.BaseRegs)
4689if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4691if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4693 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4694 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4696 (
unsigned)llvm::countr_zero<uint64_t>(
4697 NewF.BaseOffset.getFixedValue()))
4702 NewF.canonicalize(*this->L);
4703 (void)InsertFormula(LU, LUIdx, NewF);
4712/// Generate formulae for each use. 4714LSRInstance::GenerateAllReuseFormulae() {
4715// This is split into multiple loops so that hasRegsUsedByUsesOtherThan 4716// queries are more precise. 4717for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4718 LSRUse &LU =
Uses[LUIdx];
4719for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4721for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4722 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4724for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4725 LSRUse &LU =
Uses[LUIdx];
4726for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4727 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4728for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4730for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4732for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4733 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4735for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4736 LSRUse &LU =
Uses[LUIdx];
4737for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4738 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4741 GenerateCrossUseConstantOffsets();
4744"After generating reuse formulae:\n";
4745 print_uses(
dbgs()));
4748/// If there are multiple formulae with the same set of registers used 4749/// by other uses, pick the best one and delete the others. 4750void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4755bool ChangedFormulae =
false;
4758// Collect the best formula for each unique set of shared registers. This 4759// is reset for each use. 4760usingBestFormulaeTy =
4763 BestFormulaeTy BestFormulae;
4765for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4766 LSRUse &LU =
Uses[LUIdx];
4771for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4772 FIdx != NumForms; ++FIdx) {
4773 Formula &
F = LU.Formulae[FIdx];
4775// Some formulas are instant losers. For example, they may depend on 4776// nonexistent AddRecs from other loops. These need to be filtered 4777// immediately, otherwise heuristics could choose them over others leading 4778// to an unsatisfactory solution. Passing LoserRegs into RateFormula here 4779// avoids the need to recompute this information across formulae using the 4780// same bad AddRec. Passing LoserRegs is also essential unless we remove 4781// the corresponding bad register from the Regs set. 4784 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4785if (CostF.isLoser()) {
4786// During initial formula generation, undesirable formulae are generated 4787// by uses within other loops that have some non-trivial address mode or 4788// use the postinc form of the IV. LSR needs to provide these formulae 4789// as the basis of rediscovering the desired formula that uses an AddRec 4790// corresponding to the existing phi. Once all formulae have been 4791// generated, these initial losers may be pruned. 4797for (
constSCEV *Reg :
F.BaseRegs) {
4798if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4802 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4803Key.push_back(
F.ScaledReg);
4804// Unstable sort by host order ok, because this is only used for 4808 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4809 BestFormulae.insert(std::make_pair(Key, FIdx));
4813 Formula &Best = LU.Formulae[
P.first->second];
4817 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4818if (CostF.isLess(CostBest))
4822" in favor of formula ";
4826 ChangedFormulae =
true;
4828 LU.DeleteFormula(
F);
4834// Now that we've filtered out some formulae, recompute the Regs set. 4836 LU.RecomputeRegs(LUIdx, RegUses);
4838// Reset this to prepare for the next use. 4839 BestFormulae.clear();
4844"After filtering out undesirable candidates:\n";
4849/// Estimate the worst-case number of solutions the solver might have to 4850/// consider. It almost never considers this many solutions because it prune the 4851/// search space, but the pruning isn't always sufficient. 4852size_t LSRInstance::EstimateSearchSpaceComplexity()
const{
4854for (
const LSRUse &LU :
Uses) {
4855size_t FSize = LU.Formulae.size();
4867/// When one formula uses a superset of the registers of another formula, it 4868/// won't help reduce register pressure (though it may not necessarily hurt 4869/// register pressure); remove it to simplify the system. 4870void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4874LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae " 4875"which use a superset of registers used by other " 4878for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4879 LSRUse &LU =
Uses[LUIdx];
4881for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4882 Formula &
F = LU.Formulae[i];
4883if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4885// Look for a formula with a constant or GV in a register. If the use 4886// also has a formula with that same value in an immediate field, 4887// delete the one that uses a register. 4889I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4892//FIXME: Formulas should store bitwidth to do wrapping properly. 4895 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4896 (
uint64_t)
C->getValue()->getSExtValue());
4897 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4898 (
I -
F.BaseRegs.begin()));
4899if (LU.HasFormulaWithSameRegs(NewF)) {
4902 LU.DeleteFormula(
F);
4908 }
elseif (
constSCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4909if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4913 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4914 (
I -
F.BaseRegs.begin()));
4915if (LU.HasFormulaWithSameRegs(NewF)) {
4918 LU.DeleteFormula(
F);
4929 LU.RecomputeRegs(LUIdx, RegUses);
4936/// When there are many registers for expressions like A, A+1, A+2, etc., 4937/// allocate a single register for them. 4938void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4943dbgs() <<
"The search space is too complex.\n" 4944"Narrowing the search space by assuming that uses separated " 4945"by a constant offset will use the same registers.\n");
4947// This is especially useful for unrolled loops. 4949for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4950 LSRUse &LU =
Uses[LUIdx];
4951for (
const Formula &
F : LU.Formulae) {
4952if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4955 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4959if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
/*HasBaseReg=*/false,
4960 LU.Kind, LU.AccessTy))
4965 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4967// Transfer the fixups of LU to LUThatHas. 4968for (LSRFixup &
Fixup : LU.Fixups) {
4969Fixup.Offset +=
F.BaseOffset;
4970 LUThatHas->pushFixup(
Fixup);
4974// Delete formulae from the new use which are no longer legal. 4976for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4977 Formula &
F = LUThatHas->Formulae[i];
4978if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4979 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4981 LUThatHas->DeleteFormula(
F);
4989 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4991// Delete the old use. 4992 DeleteUse(LU, LUIdx);
5002/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that 5003/// we've done more filtering, as it may be able to find more formulae to 5005void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5009LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out " 5010"undesirable dedicated registers.\n");
5012 FilterOutUndesirableDedicatedRegisters();
5018/// If a LSRUse has multiple formulae with the same ScaledReg and Scale. 5019/// Pick the best one and delete the others. 5020/// This narrowing heuristic is to keep as many formulae with different 5021/// Scale and ScaledReg pair as possible while narrowing the search space. 5022/// The benefit is that it is more likely to find out a better solution 5023/// from a formulae set with more Scale and ScaledReg variations than 5024/// a formulae set with the same Scale and ScaledReg. The picking winner 5025/// reg heuristic will often keep the formulae with the same Scale and 5026/// ScaledReg and filter others, and we want to avoid that if possible. 5027void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5032dbgs() <<
"The search space is too complex.\n" 5033"Narrowing the search space by choosing the best Formula " 5034"from the Formulae with the same Scale and ScaledReg.\n");
5036// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. 5039 BestFormulaeTy BestFormulae;
5041bool ChangedFormulae =
false;
5046for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5047 LSRUse &LU =
Uses[LUIdx];
5051// Return true if Formula FA is better than Formula FB. 5052auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5053// First we will try to choose the Formula with fewer new registers. 5054// For a register used by current Formula, the more the register is 5055// shared among LSRUses, the less we increase the register number 5056// counter of the formula. 5058for (
constSCEV *Reg : FA.BaseRegs) {
5059constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5060 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5063for (
constSCEV *Reg : FB.BaseRegs) {
5064constSmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5065 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5067if (FARegNum != FBRegNum)
5068return FARegNum < FBRegNum;
5070// If the new register numbers are the same, choose the Formula with 5075 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
5077 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
5078return CostFA.isLess(CostFB);
5082for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5084 Formula &
F = LU.Formulae[FIdx];
5087autoP = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5091 Formula &Best = LU.Formulae[
P.first->second];
5092if (IsBetterThan(
F, Best))
5096" in favor of formula ";
5099 ChangedFormulae =
true;
5101 LU.DeleteFormula(
F);
5107 LU.RecomputeRegs(LUIdx, RegUses);
5109// Reset this to prepare for the next use. 5110 BestFormulae.clear();
5115"After filtering out undesirable candidates:\n";
5120/// If we are over the complexity limit, filter out any post-inc prefering 5121/// variables to only post-inc values. 5122void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5129"Narrowing the search space by choosing the lowest " 5130"register Formula for PostInc Uses.\n");
5132for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5133 LSRUse &LU =
Uses[LUIdx];
5135if (LU.Kind != LSRUse::Address)
5141size_t MinRegs = std::numeric_limits<size_t>::max();
5142for (
const Formula &
F : LU.Formulae)
5143 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5146for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5148 Formula &
F = LU.Formulae[FIdx];
5149if (
F.getNumRegs() > MinRegs) {
5152 LU.DeleteFormula(
F);
5159 LU.RecomputeRegs(LUIdx, RegUses);
5168/// The function delete formulas with high registers number expectation. 5169/// Assuming we don't know the value of each formula (already delete 5170/// all inefficient), generate probability of not selecting for each 5174/// reg(a) + reg({0,+,1}) 5175/// reg(a) + reg({-1,+,1}) + 1 5178/// reg(b) + reg({0,+,1}) 5179/// reg(b) + reg({-1,+,1}) + 1 5182/// reg(c) + reg(b) + reg({0,+,1}) 5183/// reg(c) + reg({b,+,1}) 5185/// Probability of not selecting 5187/// reg(a) (1/3) * 1 * 1 5188/// reg(b) 1 * (1/3) * (1/2) 5189/// reg({0,+,1}) (2/3) * (2/3) * (1/2) 5190/// reg({-1,+,1}) (2/3) * (2/3) * 1 5191/// reg({a,+,1}) (2/3) * 1 * 1 5192/// reg({b,+,1}) 1 * (2/3) * (2/3) 5195/// Now count registers number mathematical expectation for each formula: 5196/// Note that for each use we exclude probability if not selecting for the use. 5197/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding 5198/// probabilty 1/3 of not selecting for Use1). 5200/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted 5201/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted 5204/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted 5205/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted 5208/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted 5209/// reg(c) + reg({b,+,1}) 1 + 2/3 5210void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5213// Ok, we have too many of formulae on our hands to conveniently handle. 5214// Use a rough heuristic to thin out the list. 5216// Set of Regs wich will be 100% used in final solution. 5217// Used in each formula of a solution (in example above this is reg(c)). 5218// We can skip them in calculations. 5222// Map each register to probability of not selecting 5223 DenseMap <const SCEV *, float> RegNumMap;
5224for (
constSCEV *Reg : RegUses) {
5225if (UniqRegs.
count(Reg))
5228for (
const LSRUse &LU :
Uses) {
5229if (!LU.Regs.count(Reg))
5231floatP = LU.getNotSelectedProbability(Reg);
5237 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
5241dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5243// Delete formulas where registers number expectation is high. 5244for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5245 LSRUse &LU =
Uses[LUIdx];
5246// If nothing to delete - continue. 5247if (LU.Formulae.size() < 2)
5249// This is temporary solution to test performance. Float should be 5250// replaced with round independent type (based on integers) to avoid 5251// different results for different target builds. 5252float FMinRegNum = LU.Formulae[0].getNumRegs();
5253float FMinARegNum = LU.Formulae[0].getNumRegs();
5255for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5256 Formula &
F = LU.Formulae[i];
5259for (
constSCEV *BaseReg :
F.BaseRegs) {
5260if (UniqRegs.
count(BaseReg))
5262 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5263if (isa<SCEVAddRecExpr>(BaseReg))
5265 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5267if (
constSCEV *ScaledReg =
F.ScaledReg) {
5268if (!UniqRegs.
count(ScaledReg)) {
5270 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5271if (isa<SCEVAddRecExpr>(ScaledReg))
5273 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5276if (FMinRegNum > FRegNum ||
5277 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5278 FMinRegNum = FRegNum;
5279 FMinARegNum = FARegNum;
5284dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5286std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5287while (LU.Formulae.size() != 1) {
5290 LU.Formulae.pop_back();
5292 LU.RecomputeRegs(LUIdx, RegUses);
5293assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5294 Formula &
F = LU.Formulae[0];
5296// When we choose the formula, the regs become unique. 5297 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5304// Check if Best and Reg are SCEVs separated by a constant amount C, and if so 5305// would the addressing offset +C would be legal where the negative offset -C is 5310 MemAccessTy AccessType) {
5311if (Best->
getType() != Reg->getType() ||
5312 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5313 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5314 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5321 AccessType.MemTy,
/*BaseGV=*/nullptr,
5322/*BaseOffset=*/Diff->getSExtValue(),
5323/*HasBaseReg=*/true,
/*Scale=*/0, AccessType.AddrSpace) &&
5325 AccessType.MemTy,
/*BaseGV=*/nullptr,
5326/*BaseOffset=*/-Diff->getSExtValue(),
5327/*HasBaseReg=*/true,
/*Scale=*/0, AccessType.AddrSpace);
5330/// Pick a register which seems likely to be profitable, and then in any use 5331/// which has any reference to that register, delete all formulae which do not 5332/// reference that register. 5333void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5334// With all other options exhausted, loop until the system is simple 5338// Ok, we have too many of formulae on our hands to conveniently handle. 5339// Use a rough heuristic to thin out the list. 5342// Pick the register which is used by the most LSRUses, which is likely 5343// to be a good reuse register candidate. 5344constSCEV *Best =
nullptr;
5345unsigned BestNum = 0;
5346for (
constSCEV *Reg : RegUses) {
5347if (Taken.
count(Reg))
5351 BestNum = RegUses.getUsedByIndices(Reg).count();
5353unsigned Count = RegUses.getUsedByIndices(Reg).count();
5354if (Count > BestNum) {
5359// If the scores are the same, but the Reg is simpler for the target 5360// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can 5361// handle +C but not -C), opt for the simpler formula. 5362if (Count == BestNum) {
5363int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5364if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5366Uses[LUIdx].AccessTy)) {
5373assert(Best &&
"Failed to find best LSRUse candidate");
5375LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5376 <<
" will yield profitable reuse.\n");
5379// In any use with formulae which references this register, delete formulae 5380// which don't reference it. 5381for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5382 LSRUse &LU =
Uses[LUIdx];
5383if (!LU.Regs.count(Best))
continue;
5386for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5387 Formula &
F = LU.Formulae[i];
5388if (!
F.referencesReg(Best)) {
5390 LU.DeleteFormula(
F);
5394assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5400 LU.RecomputeRegs(LUIdx, RegUses);
5407/// If there are an extraordinary number of formulae to choose from, use some 5408/// rough heuristics to prune down the number of formulae. This keeps the main 5409/// solver from taking an extraordinary amount of time in some worst-case 5411void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5412 NarrowSearchSpaceByDetectingSupersets();
5413 NarrowSearchSpaceByCollapsingUnrolledCode();
5414 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5416 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5417 NarrowSearchSpaceByFilterPostInc();
5419 NarrowSearchSpaceByDeletingCostlyFormulas();
5421 NarrowSearchSpaceByPickingWinnerRegs();
5424/// This is the recursive solver. 5433// - use more aggressive filtering 5434// - sort the formula so that the most profitable solutions are found first 5435// - sort the uses too 5437// - don't compute a cost, and then compare. compare while computing a cost 5439// - track register sets with SmallBitVector 5441const LSRUse &LU =
Uses[Workspace.
size()];
5443// If this use references any register that's already a part of the 5444// in-progress solution, consider it a requirement that a formula must 5445// reference that register in order to be considered. This prunes out 5446// unprofitable searching. 5448for (
constSCEV *S : CurRegs)
5449if (LU.Regs.count(S))
5454for (
const Formula &
F : LU.Formulae) {
5455// Ignore formulae which may not be ideal in terms of register reuse of 5456// ReqRegs. The formula should use all required registers before 5457// introducing new ones. 5458// This can sometimes (notably when trying to favour postinc) lead to 5459// sub-optimial decisions. There it is best left to the cost modelling to 5462int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5463for (
constSCEV *Reg : ReqRegs) {
5464if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5467if (NumReqRegsToFind == 0)
5471if (NumReqRegsToFind != 0) {
5472// If none of the formulae satisfied the required registers, then we could 5473// clear ReqRegs and try again. Currently, we simply give up in this case. 5478// Evaluate the cost of the current formula. If it's already worse than 5479// the current best, prune the search at that point. 5482 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5483if (NewCost.isLess(SolutionCost)) {
5485if (Workspace.
size() !=
Uses.size()) {
5486 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5487 NewRegs, VisitedRegs);
5488if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5489 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5492dbgs() <<
".\nRegs:\n";
5494 <<
"- " << *S <<
"\n";
5497 SolutionCost = NewCost;
5498 Solution = Workspace;
5505/// Choose one formula from each use. Return the results in the given Solution 5509Cost SolutionCost(L, SE,
TTI, AMK);
5510 SolutionCost.Lose();
5516// SolveRecurse does all the work. 5517 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5518 CurRegs, VisitedRegs);
5519if (Solution.
empty()) {
5524// Ok, we've now made all our decisions. 5526"The chosen solution requires ";
5527 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5528for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5533 Solution[i]->print(
dbgs());
5539constbool EnableDropUnprofitableSolution = [&] {
5551if (BaselineCost.isLess(SolutionCost)) {
5552if (!EnableDropUnprofitableSolution)
5554dbgs() <<
"Baseline is more profitable than chosen solution, " 5555"add option 'lsr-drop-solution' to drop LSR solution.\n");
5558"solution, dropping LSR solution.\n";);
5564/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as 5565/// we can go while still being dominated by the input positions. This helps 5566/// canonicalize the insert position, which encourages sharing. 5573bool AllDominate =
true;
5575// Don't bother attempting to insert before a catchswitch, their basic block 5576// cannot have other non-PHI instructions. 5577if (isa<CatchSwitchInst>(Tentative))
5581if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5585// Attempt to find an insert position in the middle of the block, 5586// instead of at the end, so that it can be used for other expansions. 5587if (Tentative->
getParent() == Inst->getParent() &&
5588 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5598constLoop *IPLoop = LI.getLoopFor(IP->getParent());
5599unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5604 Rung = Rung->getIDom();
5606 IDom = Rung->getBlock();
5608// Don't climb into a loop though. 5609constLoop *IDomLoop = LI.getLoopFor(IDom);
5610unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5611if (IDomDepth <= IPLoopDepth &&
5612 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5622/// Determine an input position which will be dominated by the operands and 5623/// which will dominate the result. 5626// Collect some instructions which must be dominated by the 5627// expanding replacement. These must be dominated by any operands that 5628// will be required in the expansion. 5630if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5632if (LU.Kind == LSRUse::ICmpZero)
5634 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5636if (LF.PostIncLoops.count(L)) {
5637if (LF.isUseFullyOutsideLoop(L))
5638 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5642// The expansion must also be dominated by the increment positions of any 5643// loops it for which it is using post-inc mode. 5644for (
constLoop *PIL : LF.PostIncLoops) {
5645if (PIL == L)
continue;
5647// Be dominated by the loop exit. 5650if (!ExitingBlocks.
empty()) {
5652for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5658assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5659 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5660"Insertion point must be a normal instruction");
5662// Then, climb up the immediate dominator tree as far as we can go while 5663// still being dominated by the input positions. 5666// Don't insert instructions before PHI nodes. 5667while (isa<PHINode>(IP)) ++IP;
5669// Ignore landingpad instructions. 5670while (IP->isEHPad()) ++IP;
5672// Ignore debug intrinsics. 5673while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5675// Set IP below instructions recently inserted by SCEVExpander. This keeps the 5676// IP consistent across expansions and allows the previously inserted 5677// instructions to be reused by subsequent expansion. 5678while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5684/// Emit instructions for the leading candidate expression for this LSRUse (this 5685/// is called "expanding"). 5686Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5690return LF.OperandValToReplace;
5692// Determine an input position which will be dominated by the operands and 5693// which will dominate the result. 5694 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5697// Inform the Rewriter if we have a post-increment use, so that it can 5698// perform an advantageous expansion. 5699Rewriter.setPostInc(LF.PostIncLoops);
5701// This is the type that the user actually needs. 5702Type *OpTy = LF.OperandValToReplace->getType();
5703// This will be the type that we'll initially expand to. 5704Type *Ty =
F.getType();
5706// No type known; just expand directly to the ultimate type. 5709// Expand directly to the ultimate type if it's the right size. 5711// This is the type to do integer arithmetic in. 5714// Build up a list of operands to add together to form the full base. 5717// Expand the BaseRegs portion. 5718for (
constSCEV *Reg :
F.BaseRegs) {
5719assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5721// If we're expanding for a post-inc user, make the post-inc adjustment. 5726// Expand the ScaledReg portion. 5727Value *ICmpScaledV =
nullptr;
5729constSCEV *ScaledS =
F.ScaledReg;
5731// If we're expanding for a post-inc user, make the post-inc adjustment. 5735if (LU.Kind == LSRUse::ICmpZero) {
5736// Expand ScaleReg as if it was part of the base regs. 5741// An interesting way of "folding" with an icmp is to use a negated 5742// scale, which we'll implement by inserting it into the other operand 5745"The only scale supported by ICmpZero uses is -1!");
5746 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5749// Otherwise just expand the scaled register and an explicit scale, 5750// which is expected to be matched as part of the address. 5752// Flush the operand list to suppress SCEVExpander hoisting address modes. 5753// Unless the addressing mode will not be folded. 5754if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5768// Expand the GV portion. 5770// Flush the operand list to suppress SCEVExpander hoisting. 5779// Flush the operand list to suppress SCEVExpander hoisting of both folded and 5780// unfolded offsets. LSR assumes they both live next to their uses. 5787// FIXME: Are we sure we won't get a mismatch here? Is there a way to bail 5788// out at this point, or should we generate a SCEV adding together mixed 5790assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5791"Expanding mismatched offsets\n");
5792// Expand the immediate portion. 5793 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5795if (LU.Kind == LSRUse::ICmpZero) {
5796// The other interesting way of "folding" with an ICmpZero is to use a 5797// negated immediate. 5803 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5806// Just add the immediate values. These again are expected to be matched 5807// as part of the address. 5812// Expand the unfolded offset portion. 5813 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5814if (UnfoldedOffset.isNonZero()) {
5815// Just add the immediate values. 5816 Ops.
push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5819// Emit instructions summing all the operands. 5825// We're done expanding now, so reset the rewriter. 5828// An ICmpZero Formula represents an ICmp which we're handling as a 5829// comparison against zero. Now that we've expanded an expression for that 5830// form, update the ICmp's other operand. 5831if (LU.Kind == LSRUse::ICmpZero) {
5832ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5833if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5835assert(!
F.BaseGV &&
"ICmp does not support folding a global value and " 5836"a scale at the same time!");
5838if (ICmpScaledV->
getType() != OpTy) {
5846// A scale of 1 means that the scale has been expanded as part of the 5848assert((
F.Scale == 0 ||
F.Scale == 1) &&
5849"ICmp does not support folding a global value and " 5850"a scale at the same time!");
5853if (
C->getType() != OpTy) {
5857assert(
C &&
"Cast of ConstantInt should have folded");
5867/// Helper for Rewrite. PHI nodes are special because the use of their operands 5868/// effectively happens in their predecessor blocks, so the expression may need 5869/// to be expanded in multiple places. 5870void LSRInstance::RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
5871const LSRFixup &LF,
const Formula &
F,
5877bool needUpdateFixups =
false;
5880// If this is a critical edge, split the edge so that we do not insert 5881// the code on all predecessor/successor paths. We do this unless this 5882// is the canonical backedge for this loop, which complicates post-inc 5888Loop *PNLoop = LI.getLoopFor(Parent);
5889if (!PNLoop || Parent != PNLoop->
getHeader()) {
5890// Split the critical edge. 5896 .setMergeIdenticalEdges()
5897 .setKeepOneInputPHIs());
5904// If NewBB==NULL, then SplitCriticalEdge refused to split because all 5905// phi predecessors are identical. The simple thing to do is skip 5906// splitting in this case rather than complicate the API. 5908// If PN is outside of the loop and BB is in the loop, we want to 5909// move the block to be immediately before the PHI block, not 5910// immediately after BB. 5911if (
L->contains(BB) && !
L->contains(PN))
5914// Splitting the edge can reduce the number of PHI entries we have. 5919 needUpdateFixups =
true;
5924 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5925Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5932// If this is reuse-by-noop-cast, insert the noop cast. 5933Type *OpTy = LF.OperandValToReplace->getType();
5937 LF.OperandValToReplace->getType(),
"tmp",
5940// If the incoming block for this value is not in the loop, it means the 5941// current PHI is not in a loop exit, so we must create a LCSSA PHI for 5942// the inserted value. 5943if (
auto *
I = dyn_cast<Instruction>(FullV))
5944if (
L->contains(
I) && !
L->contains(BB))
5945 InsertedNonLCSSAInsts.insert(
I);
5948 Pair.first->second = FullV;
5951// If LSR splits critical edge and phi node has other pending 5952// fixup operands, we need to update those pending fixups. Otherwise 5953// formulae will not be implemented completely and some instructions 5954// will not be eliminated. 5955if (needUpdateFixups) {
5956for (LSRUse &LU :
Uses)
5957for (LSRFixup &
Fixup : LU.Fixups)
5958// If fixup is supposed to rewrite some operand in the phi 5959// that was just updated, it may be already moved to 5960// another phi node. Such fixup requires update. 5961if (
Fixup.UserInst == PN) {
5962// Check if the operand we try to replace still exists in the 5964bool foundInOriginalPHI =
false;
5966if (val ==
Fixup.OperandValToReplace) {
5967 foundInOriginalPHI =
true;
5971// If fixup operand found in original PHI - nothing to do. 5972if (foundInOriginalPHI)
5975// Otherwise it might be moved to another PHI and requires update. 5976// If fixup operand not found in any of the incoming blocks that 5977// means we have already rewritten it - nothing to do. 5983if (val ==
Fixup.OperandValToReplace)
5984Fixup.UserInst = NewPN;
5991/// Emit instructions for the leading candidate expression for this LSRUse (this 5992/// is called "expanding"), and update the UserInst to reference the newly 5994void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5997// First, find an insertion point that dominates UserInst. For PHI nodes, 5998// find the nearest block which dominates all the relevant uses. 5999if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
6000 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6002Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
6004// If this is reuse-by-noop-cast, insert the noop cast. 6005Type *OpTy = LF.OperandValToReplace->getType();
6006if (FullV->
getType() != OpTy) {
6009 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
6013// Update the user. ICmpZero is handled specially here (for now) because 6014// Expand may have updated one of the operands of the icmp already, and 6015// its new value may happen to be equal to LF.OperandValToReplace, in 6016// which case doing replaceUsesOfWith leads to replacing both operands 6017// with the same value. TODO: Reorganize this. 6018if (LU.Kind == LSRUse::ICmpZero)
6021 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
6024if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
6028// Trying to hoist the IVInc to loop header if all IVInc users are in 6029// the loop header. It will help backend to generate post index load/store 6030// when the latch block is different from loop header block. 6034if (LU.Kind != LSRUse::Address)
6037// For now this code do the conservative optimization, only work for 6038// the header block. Later we can hoist the IVInc to the block post 6039// dominate all users. 6041if (IVIncInsertPos->
getParent() == LHeader)
6044if (!
Fixup.OperandValToReplace ||
6046 Instruction *UI = cast<Instruction>(U);
6047 return UI->getParent() != LHeader;
6052Type *Ty =
I->getType();
6058/// Rewrite all the fixup locations with new values, following the chosen 6060void LSRInstance::ImplementSolution(
6062// Keep track of instructions we may have made dead, so that 6063// we can remove them after we are done working. 6066// Mark phi nodes that terminate chains so the expander tries to reuse them. 6067for (
const IVChain &Chain : IVChainVec) {
6068if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
6072// Expand the new value definitions and update the users. 6073for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6074for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6077 ?
L->getHeader()->getTerminator()
6079Rewriter.setIVIncInsertPos(L, InsertPos);
6080 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6084auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6087for (
const IVChain &Chain : IVChainVec) {
6088 GenerateIVChain(Chain, DeadInsts);
6094 ScalarEvolutionIVs.push_back(
IV);
6096// Clean up after ourselves. This must be done before deleting any 6103// In our cost analysis above, we assume that each addrec consumes exactly 6104// one register, and arrange to have increments inserted just before the 6105// latch to maximimize the chance this is true. However, if we reused 6106// existing IVs, we now need to move the increments to match our 6107// expectations. Otherwise, our cost modeling results in us having a 6108// chosen a non-optimal result for the actual schedule. (And yes, this 6109// scheduling decision does impact later codegen.) 6110for (
PHINode &PN :
L->getHeader()->phis()) {
6112Value *Start =
nullptr, *Step =
nullptr;
6117case Instruction::Sub:
6119// sub is non-commutative - match handling elsewhere in LSR 6122case Instruction::Add:
6128if (!isa<Constant>(Step))
6129// If not a constant step, might increase register pressure 6130// (We assume constants have been canonicalized to RHS) 6133if (BO->
getParent() == IVIncInsertPos->getParent())
6134// Only bother moving across blocks. Isel can handle block local case. 6137// Can we legally schedule inc at the desired point? 6139 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6141 BO->
moveBefore(IVIncInsertPos->getIterator());
6152 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6155 :
TTI.getPreferredAddressingMode(
L, &SE)),
6157 BaselineCost(
L, SE,
TTI, AMK) {
6158// If LoopSimplify form is not available, stay out of trouble. 6159if (!
L->isLoopSimplifyForm())
6162// If there's no interesting work to be done, bail early. 6163if (IU.
empty())
return;
6165// If there's too much analysis to be done, bail early. We won't be able to 6166// model the problem anyway. 6167unsigned NumUsers = 0;
6171LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6175// Bail out if we have a PHI on an EHPad that gets a value from a 6176// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is 6177// no good place to stick any instructions. 6178if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
6179auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6180if (isa<FuncletPadInst>(FirstNonPHI) ||
6181 isa<CatchSwitchInst>(FirstNonPHI))
6183if (isa<CatchSwitchInst>(PredBB->getFirstNonPHIIt()))
6189L->getHeader()->printAsOperand(
dbgs(),
/*PrintType=*/false);
6192// Configure SCEVExpander already now, so the correct mode is used for 6193// isSafeToExpand() checks. 6194#if LLVM_ENABLE_ABI_BREAKING_CHECKS 6200// First, perform some low-level loop optimizations. 6202 OptimizeLoopTermCond();
6204// If loop preparation eliminates all interesting IV users, bail. 6205if (IU.empty())
return;
6207// Skip nested loops until we can model them better with formulae. 6208if (!
L->isInnermost()) {
6213// Start collecting data and preparing for the solver. 6214// If number of registers is not the major cost, we cannot benefit from the 6215// current profitable chain optimization which is based on number of 6217// FIXME: add profitable chain optimization for other kinds major cost, for 6218// example number of instructions. 6221 CollectInterestingTypesAndFactors();
6222 CollectFixupsAndInitialFormulae();
6223 CollectLoopInvariantFixupsAndFormulae();
6229 print_uses(
dbgs()));
6231 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6233// Now use the reuse data to generate a bunch of interesting ways 6234// to formulate the values needed for the uses. 6235 GenerateAllReuseFormulae();
6237 FilterOutUndesirableDedicatedRegisters();
6238 NarrowSearchSpaceUsingHeuristics();
6243// Release memory that is no longer needed. 6248if (Solution.
empty())
6252// Formulae should be legal. 6253for (
const LSRUse &LU :
Uses) {
6254for (
const Formula &
F : LU.Formulae)
6256F) &&
"Illegal formula generated!");
6260// Now that we've decided what we want, make it so. 6261 ImplementSolution(Solution);
6264#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 6265void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const{
6266if (Factors.empty() &&
Types.empty())
return;
6268OS <<
"LSR has identified the following interesting factors and types: ";
6271for (int64_t Factor : Factors) {
6277for (
Type *Ty : Types) {
6280OS <<
'(' << *Ty <<
')';
6286OS <<
"LSR is examining the following fixup sites:\n";
6287for (
const LSRUse &LU :
Uses)
6288for (
const LSRFixup &LF : LU.Fixups) {
6296OS <<
"LSR is examining the following uses:\n";
6297for (
const LSRUse &LU :
Uses) {
6301for (
const Formula &
F : LU.Formulae) {
6310 print_factors_and_types(
OS);
6322classLoopStrengthReduce :
publicLoopPass {
6324staticcharID;
// Pass ID, replacement for typeid 6326 LoopStrengthReduce();
6333}
// end anonymous namespace 6335LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6339void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const{
6340// We split critical edges, so we change the CFG. However, we do update 6341// many analyses if they are around. 6353// Requiring LoopSimplify a second time here prevents IVUsers from running 6354// twice, since LoopSimplify was invalidated by running ScalarEvolution. 6364/// Enables more convenient iteration over a DWARF expression vector. 6374structSCEVDbgValueBuilder {
6375 SCEVDbgValueBuilder() =
default;
6376 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6378void clone(
const SCEVDbgValueBuilder &
Base) {
6379 LocationOps =
Base.LocationOps;
6384 LocationOps.clear();
6388 /// The DIExpression as we translate the SCEV. 6390 /// The location ops of the DIExpression. 6396 /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value 6397 /// in the set of values referenced by the expression. 6401unsigned ArgIndex = 0;
6402if (It != LocationOps.
end()) {
6403 ArgIndex = std::distance(LocationOps.
begin(), It);
6405 ArgIndex = LocationOps.
size();
6417if (
C->getAPInt().getSignificantBits() > 64)
6419 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6420 Expr.
push_back(
C->getAPInt().getSExtValue());
6424// Iterating the expression as DWARF ops is convenient when updating 6425// DWARF_OP_LLVM_args. 6427return ToDwarfOpIter(Expr);
6430 /// Several SCEV types are sequences of the same arithmetic operator applied 6431 /// to constants and values that may be extended or truncated. 6434assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6435"Expected arithmetic SCEV type");
6437unsigned EmitOperator = 0;
6441if (EmitOperator >= 1)
6442 pushOperator(DwarfOp);
6448// TODO: Identify and omit noop casts. 6455 IsSigned ? llvm::dwarf::DW_ATE_signed
6456 : llvm::dwarf::DW_ATE_unsigned};
6457for (
constauto &
Op : CastOps)
6462// TODO: MinMax - although these haven't been encountered in the test suite. 6465if (
constSCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6466Success &= pushConst(StartInt);
6468 }
elseif (
constSCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6471 pushLocation(
U->getValue());
6473 }
elseif (
constSCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6474Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6476 }
elseif (
constSCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6477Success &= pushSCEV(UDiv->getLHS());
6478Success &= pushSCEV(UDiv->getRHS());
6479 pushOperator(llvm::dwarf::DW_OP_div);
6481 }
elseif (
constSCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6482// Assert if a new and unknown SCEVCastEXpr type is encountered. 6483assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6484 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6485"Unexpected cast type in SCEV.");
6486Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6488 }
elseif (
constSCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6489Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6491 }
elseif (isa<SCEVAddRecExpr>(S)) {
6492// Nested SCEVAddRecExpr are generated by nested loops and are currently 6502 /// Return true if the combination of arithmetic operator and underlying 6503 /// SCEV constant value is an identity function. 6506if (
C->getAPInt().getSignificantBits() > 64)
6508 int64_t
I =
C->getAPInt().getSExtValue();
6510case llvm::dwarf::DW_OP_plus:
6511case llvm::dwarf::DW_OP_minus:
6513case llvm::dwarf::DW_OP_mul:
6514case llvm::dwarf::DW_OP_div:
6521 /// Convert a SCEV of a value to a DIExpression that is pushed onto the 6522 /// builder's expression stack. The stack should already contain an 6523 /// expression for the iteration count, so that it can be multiplied by 6524 /// the stride and added to the start. 6525 /// Components of the expression are omitted if they are an identity function. 6526 /// Chain (non-affine) SCEVs are not supported. 6529// TODO: Is this check needed? 6530if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6536// Skip pushing arithmetic noops. 6537if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6538if (!pushSCEV(Stride))
6540 pushOperator(llvm::dwarf::DW_OP_mul);
6542if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6543if (!pushSCEV(Start))
6545 pushOperator(llvm::dwarf::DW_OP_plus);
6550 /// Create an expression that is an offset from a value (usually the IV). 6551void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6552 pushLocation(OffsetValue);
6555dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: " 6556 << std::to_string(
Offset) <<
"\n");
6559 /// Combine a translation of the SCEV and the IV to create an expression that 6560 /// recovers a location's value. 6561 /// returns true if an expression was created. 6562bool createIterCountExpr(
constSCEV *S,
6563const SCEVDbgValueBuilder &IterationCount,
6565// SCEVs for SSA values are most frquently of the form 6566// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). 6567// This is because %a is a PHI node that is not the IV. However, these 6568// SCEVs have not been observed to result in debuginfo-lossy optimisations, 6569// so its not expected this point will be reached. 6570if (!isa<SCEVAddRecExpr>(S))
6573LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6576constauto *Rec = cast<SCEVAddRecExpr>(S);
6577if (!Rec->isAffine())
6583// Initialise a new builder with the iteration count expression. In 6584// combination with the value's SCEV this enables recovery. 6585 clone(IterationCount);
6586if (!SCEVToValueExpr(*Rec, SE))
6592 /// Convert a SCEV of a value to a DIExpression that is pushed onto the 6593 /// builder's expression stack. The stack should already contain an 6594 /// expression for the iteration count, so that it can be multiplied by 6595 /// the stride and added to the start. 6596 /// Components of the expression are omitted if they are an identity function. 6600if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6601LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: " 6608// Skip pushing arithmetic noops. 6609if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6610if (!pushSCEV(Start))
6612 pushOperator(llvm::dwarf::DW_OP_minus);
6614if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6615if (!pushSCEV(Stride))
6617 pushOperator(llvm::dwarf::DW_OP_div);
6622// Append the current expression and locations to a location list and an 6623// expression list. Modify the DW_OP_LLVM_arg indexes to account for 6624// the locations already present in the destination list. 6628"Expected the locations vector to contain the IV");
6629// The DWARF_OP_LLVM_arg arguments of the expression being appended must be 6630// modified to account for the locations already in the destination vector. 6631// All builders contain the IV as the first location op. 6633"Expected the location ops to contain the IV.");
6634// DestIndexMap[n] contains the index in DestLocations for the nth 6635// location in this SCEVDbgValueBuilder. 6637for (
constauto &
Op : LocationOps) {
6638auto It =
find(DestLocations,
Op);
6639if (It != DestLocations.
end()) {
6640// Location already exists in DestLocations, reuse existing ArgIndex. 6641 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6644// Location is not in DestLocations, add it. 6649for (
constauto &
Op : expr_ops()) {
6651Op.appendToVector(DestExpr);
6656// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV, 6657// DestIndexMap[n] contains its new index in DestLocations. 6658uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6664/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs 6665/// and DIExpression. 6666structDVIRecoveryRec {
6669 HadLocationArgList(
false) {}
6671 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6675bool HadLocationArgList;
6681for (
auto &RE : RecoveryExprs)
6683 RecoveryExprs.
clear();
6686 ~DVIRecoveryRec() { clear(); }
6690/// Returns the total number of DW_OP_llvm_arg operands in the expression. 6691/// This helps in determining if a DIArglist is necessary or can be omitted from 6694auto expr_ops = ToDwarfOpIter(Expr);
6696for (
autoOp : expr_ops)
6702/// Overwrites DVI with the location and Ops as the DIExpression. This will 6703/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands, 6704/// because a DIArglist is not created for the first argument of the dbg.value. 6705template <
typename T>
6709"contain any DW_OP_llvm_arg operands.");
6711 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6712 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6715/// Overwrite DVI with locations placed into a DIArglist. 6716template <
typename T>
6721"Expected expression that references DIArglist locations using " 6722"DW_OP_llvm_arg operands.");
6724for (
Value *V : Locations)
6728 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6731/// Write the new expression and new location ops for the dbg.value. If possible 6732/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This 6733/// can be omitted if: 6734/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg. 6735/// 2. The DW_OP_LLVM_arg is the first operand in the expression. 6739auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6741if (NumLLVMArgs == 0) {
6742// Location assumed to be on the stack. 6745// There is only a single DW_OP_llvm_arg at the start of the expression, 6746// so it can be omitted along with DIArglist. 6748"Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6752// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary. 6756// If the DIExpression was previously empty then add the stack terminator. 6757// Non-empty expressions have only had elements inserted into them and so 6758// the terminator should already be present e.g. stack_value or fragment. 6760if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6763 DbgVal->setExpression(SalvageExpr);
6766if (isa<DbgValueInst *>(DVIRec.DbgRef))
6767 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6769 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6772/// Cached location ops may be erased during LSR, in which case a poison is 6773/// required when restoring from the cache. The type of that location is no 6774/// longer available, so just use int8. The poison will be replaced by one or 6775/// more locations later when a SCEVDbgValueBuilder selects alternative 6776/// locations to use for the salvage. 6781/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values. 6783auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6784LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n" 6785 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6786assert(DVIRec.Expr &&
"Expected an expression");
6787 DbgVal->setExpression(DVIRec.Expr);
6789// Even a single location-op may be inside a DIArgList and referenced with 6790// DW_OP_LLVM_arg, which is valid only with a DIArgList. 6791if (!DVIRec.HadLocationArgList) {
6792assert(DVIRec.LocationOps.size() == 1 &&
6793"Unexpected number of location ops.");
6794// LSR's unsuccessful salvage attempt may have added DIArgList, which in 6795// this case was not present before, so force the location back to a 6796// single uncontained Value. 6802for (
WeakVH VH : DVIRec.LocationOps) {
6807 DbgVal->setRawLocation(
6812if (isa<DbgValueInst *>(DVIRec.DbgRef))
6813 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6815 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6820constSCEV *SCEVInductionVar,
6821 SCEVDbgValueBuilder IterCountExpr) {
6823if (isa<DbgValueInst *>(DVIRec.DbgRef)
6824 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6825 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6828// LSR may have caused several changes to the dbg.value in the failed salvage 6829// attempt. So restore the DIExpression, the location ops and also the 6830// location ops format, which is always DIArglist for multiple ops, but only 6831// sometimes for a single op. 6834// LocationOpIndexMap[i] will store the post-LSR location index of 6835// the non-optimised out location at pre-LSR index i. 6837 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6839 NewLocationOps.
push_back(LSRInductionVar);
6841for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6842WeakVH VH = DVIRec.LocationOps[i];
6843// Place the locations not optimised out in the list first, avoiding 6844// inserts later. The map is used to update the DIExpression's 6845// DW_OP_LLVM_arg arguments as the expression is updated. 6846if (VH && !isa<UndefValue>(VH)) {
6848 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6850 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6854// It's possible that a value referred to in the SCEV may have been 6855// optimised out by LSR. 6858LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6859 <<
" refers to a location that is now undef or erased. " 6860"Salvage abandoned.\n");
6864LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6865 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6867 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6868 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6870// Create an offset-based salvage expression if possible, as it requires 6871// less DWARF ops than an iteration count-based expression. 6872if (std::optional<APInt>
Offset =
6874if (
Offset->getSignificantBits() <= 64)
6875 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6878 }
elseif (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6883// Merge the DbgValueBuilder generated expressions and the original 6884// DIExpression, place the result into an new vector. 6886if (DVIRec.Expr->getNumElements() == 0) {
6887assert(DVIRec.RecoveryExprs.size() == 1 &&
6888"Expected only a single recovery expression for an empty " 6890assert(DVIRec.RecoveryExprs[0] &&
6891"Expected a SCEVDbgSalvageBuilder for location 0");
6892 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6893B->appendToVectors(
NewExpr, NewLocationOps);
6895for (
constauto &
Op : DVIRec.Expr->expr_ops()) {
6896// Most Ops needn't be updated. 6903 SCEVDbgValueBuilder *DbgBuilder =
6904 DVIRec.RecoveryExprs[LocationArgIndex].get();
6905// The location doesn't have s SCEVDbgValueBuilder, so LSR did not 6906// optimise it away. So just translate the argument to the updated 6910assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6911"Expected a positive index for the location-op position.");
6912NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6915// The location has a recovery expression. 6916 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6920if (isa<DbgValueInst *>(DVIRec.DbgRef))
6922 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6925 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6929/// Obtain an expression for the iteration count, then attempt to salvage the 6930/// dbg.value intrinsics. 6933SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6934if (DVIToUpdate.empty())
6938assert(SCEVInductionVar &&
6939"Anticipated a SCEV for the post-LSR induction variable");
6942 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6943if (!IVAddRec->isAffine())
6946// Prevent translation using excessive resources. 6950// The iteration count is required to recover location values. 6951 SCEVDbgValueBuilder IterCountExpr;
6952 IterCountExpr.pushLocation(LSRInductionVar);
6953if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6956LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6959for (
auto &DVIRec : DVIToUpdate) {
6960SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6966/// Identify and cache salvageable DVI locations and expressions along with the 6967/// corresponding SCEV(s). Also ensure that the DVI is not deleted between 6968/// cacheing and salvaging. 6971SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6973for (
constauto &
B : L->getBlocks()) {
6975auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6976// Ensure that if any location op is undef that the dbg.vlue is not 6978if (DbgVal->isKillLocation())
6981// Check that the location op SCEVs are suitable for translation to 6983constauto &HasTranslatableLocationOps =
6984 [&](
constauto *DbgValToTranslate) ->
bool {
6985for (
constauto LocOp : DbgValToTranslate->location_ops()) {
6999if (!HasTranslatableLocationOps(DbgVal))
7002 std::unique_ptr<DVIRecoveryRec> NewRec =
7003 std::make_unique<DVIRecoveryRec>(DbgVal);
7004// Each location Op may need a SCEVDbgValueBuilder in order to recover 7005// it. Pre-allocating a vector will enable quick lookups of the builder 7006// later during the salvage. 7007 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
7008for (
constauto LocOp : DbgVal->location_ops()) {
7009 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7010 NewRec->LocationOps.push_back(LocOp);
7011 NewRec->HadLocationArgList = DbgVal->hasArgList();
7013 SalvageableDVISCEVs.push_back(std::move(NewRec));
7017if (DVR.isDbgValue() || DVR.isDbgAssign())
7018 ProcessDbgValue(&DVR);
7020auto DVI = dyn_cast<DbgValueInst>(&
I);
7023if (ProcessDbgValue(DVI))
7024 DVIHandles.insert(DVI);
7029/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback 7030/// any PHi from the loop header is usable, but may have less chance of 7031/// surviving subsequent transforms. 7033const LSRInstance &LSR) {
7035auto IsSuitableIV = [&](
PHINode *
P) {
7043// For now, just pick the first IV that was generated and inserted by 7044// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away 7045// by subsequent transforms. 7046for (
constWeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7050// There should only be PHI node IVs. 7057for (
PHINode &
P : L.getHeader()->phis()) {
7058if (IsSuitableIV(&
P))
7070// Debug preservation - before we start removing anything identify which DVI 7071// meet the salvageable criteria and store their DIExpression and SCEVs. 7077 std::unique_ptr<MemorySSAUpdater> MSSAU;
7079 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7081// Run the main LSR transformation. 7082const LSRInstance &Reducer =
7083 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7084 Changed |= Reducer.getChanged();
7086// Remove any extra phis created by processing inner loops. 7092#if LLVM_ENABLE_ABI_BREAKING_CHECKS 7095unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7104// LSR may at times remove all uses of an induction variable from a loop. 7105// The only remaining use is the PHI in the exit block. 7106// When this is the case, if the exit value of the IV can be calculated using 7107// SCEV, we can replace the exit block PHI with the final value of the IV and 7108// skip the updates in each loop iteration. 7109if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7124if (SalvageableDVIRecords.
empty())
7127// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with 7128// expressions composed using the derived iteration count. 7129// TODO: Allow for multiple IV references for nested AddRecSCEVs 7130for (
constauto &L : LI) {
7134LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV " 7135"could not be identified.\n");
7139for (
auto &Rec : SalvageableDVIRecords)
7141 SalvageableDVIRecords.
clear();
7150auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7151auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7152auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7153auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7154constauto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7155 *
L->getHeader()->getParent());
7156auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7157 *
L->getHeader()->getParent());
7158auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7159 *
L->getHeader()->getParent());
7160auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7163 MSSA = &MSSAAnalysis->getMSSA();
7180char LoopStrengthReduce::ID = 0;
7183"Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isCanonical(const MDString *S)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static SymbolRef::Type getType(const Symbol *Sym)
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Conditional or Unconditional Branch instruction.
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getVScale(Type *Ty)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool isProfitableLSRChainElement(Instruction *I) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
@ MIM_PostInc
Post-incrementing.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
static ValueAsMetadata * get(Value *V)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.