1//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file contains the implementation of the scalar evolution analysis 10// engine, which is used primarily to analyze expressions involving induction 13// There are several aspects to this library. First is the representation of 14// scalar expressions, which are represented as subclasses of the SCEV class. 15// These classes are used to represent certain types of subexpressions that we 16// can handle. We only create one SCEV of a particular shape, so 17// pointer-comparisons for equality are legal. 19// One important aspect of the SCEV objects is that they are never cyclic, even 20// if there is a cycle in the dataflow for an expression (ie, a PHI node). If 21// the PHI node is one of the idioms that we can represent (e.g., a polynomial 22// recurrence) then we represent it directly as a recurrence node, otherwise we 23// represent it as a SCEVUnknown node. 25// In addition to being able to represent expressions of various types, we also 26// have folders that are used to build the *canonical* representation for a 27// particular expression. These folders are capable of using a variety of 28// rewrite rules to simplify the expressions. 30// Once the folders are defined, we can implement the more interesting 31// higher-level code, such as the code that recognizes PHI nodes of various 32// types, computes the execution count of a loop, etc. 34// TODO: We should use these routines and value representations to implement 35// dependence analysis! 37//===----------------------------------------------------------------------===// 39// There are several good references for the techniques used in this analysis. 41// Chains of recurrences -- a method to expedite the evaluation 42// of closed-form functions 43// Olaf Bachmann, Paul S. Wang, Eugene V. Zima 45// On computational properties of chains of recurrences 48// Symbolic Evaluation of Chains of Recurrences for Loop Optimization 49// Robert A. van Engelen 51// Efficient Symbolic Analysis for Optimizing Compilers 52// Robert A. van Engelen 54// Using the chains of recurrences algebra for data dependence testing and 55// induction variable substitution 56// MS Thesis, Johnie Birch 58//===----------------------------------------------------------------------===// 85#include "llvm/Config/llvm-config.h" 136using namespacePatternMatch;
137using namespaceSCEVPatternMatch;
139#define DEBUG_TYPE "scalar-evolution" 142"Number of loop exits with predictable exit counts");
144"Number of loop exits without predictable exit counts");
146"Number of loops with trip counts computed by force");
148#ifdef EXPENSIVE_CHECKS 156cl::desc(
"Maximum number of iterations SCEV will " 157"symbolically execute a constant " 163cl::desc(
"Verify ScalarEvolution's backedge taken counts (slow)"));
166cl::desc(
"Enable stricter verification with -verify-scev is passed"));
170cl::desc(
"Verify IR correctness when making sensitive SCEV queries (slow)"),
175cl::desc(
"Threshold for inlining multiplication operands into a SCEV"),
180cl::desc(
"Threshold for inlining addition operands into a SCEV"),
184"scalar-evolution-max-scev-compare-depth",
cl::Hidden,
185cl::desc(
"Maximum depth of recursive SCEV complexity comparisons"),
189"scalar-evolution-max-scev-operations-implication-depth",
cl::Hidden,
190cl::desc(
"Maximum depth of recursive SCEV operations implication analysis"),
194"scalar-evolution-max-value-compare-depth",
cl::Hidden,
195cl::desc(
"Maximum depth of recursive value complexity comparisons"),
200cl::desc(
"Maximum depth of recursive arithmetics"),
204"scalar-evolution-max-constant-evolving-depth",
cl::Hidden,
209cl::desc(
"Maximum depth of recursive SExt/ZExt/Trunc"),
214cl::desc(
"Max coefficients in AddRec during evolving"),
219cl::desc(
"Size of the expression which is considered huge"),
224cl::desc(
"Threshold for switching to iteratively computing SCEV ranges"),
228"scalar-evolution-max-loop-guard-collection-depth",
cl::Hidden,
234cl::desc(
"When printing analysis, include information on every instruction"));
237"scalar-evolution-use-expensive-range-sharpening",
cl::Hidden,
239cl::desc(
"Use more powerful methods of sharpening expression ranges. May " 240"be costly in terms of compile time"));
243"scalar-evolution-max-scc-analysis-depth",
cl::Hidden,
244cl::desc(
"Maximum amount of nodes to process while searching SCEVUnknown " 245"Phi strongly connected components"),
250cl::desc(
"Handle <= and >= in finite loops"),
254"scalar-evolution-use-context-for-no-wrap-flag-strenghening",
cl::Hidden,
255cl::desc(
"Infer nuw/nsw flags using context where suitable"),
258//===----------------------------------------------------------------------===// 259// SCEV class definitions 260//===----------------------------------------------------------------------===// 262//===----------------------------------------------------------------------===// 263// Implementation of the SCEV class. 266#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 276 cast<SCEVConstant>(
this)->getValue()->printAsOperand(
OS,
false);
284OS <<
"(ptrtoint " << *
Op->getType() <<
" " << *
Op <<
" to " 291OS <<
"(trunc " << *
Op->getType() <<
" " << *
Op <<
" to " 298OS <<
"(zext " << *
Op->getType() <<
" " << *
Op <<
" to " 305OS <<
"(sext " << *
Op->getType() <<
" " << *
Op <<
" to " 334constchar *OpStr =
nullptr;
353 ListSeparator LS(OpStr);
366// Nothing to print for other nary expressions. 377 cast<SCEVUnknown>(
this)->getValue()->printAsOperand(
OS,
false);
380OS <<
"***COULDNOTCOMPUTE***";
389return cast<SCEVConstant>(
this)->getType();
391return cast<SCEVVScale>(
this)->getType();
396return cast<SCEVCastExpr>(
this)->getType();
398return cast<SCEVAddRecExpr>(
this)->getType();
400return cast<SCEVMulExpr>(
this)->getType();
405return cast<SCEVMinMaxExpr>(
this)->getType();
407return cast<SCEVSequentialMinMaxExpr>(
this)->getType();
409return cast<SCEVAddExpr>(
this)->getType();
411return cast<SCEVUDivExpr>(
this)->getType();
413return cast<SCEVUnknown>(
this)->getType();
430return cast<SCEVCastExpr>(
this)->operands();
439return cast<SCEVNAryExpr>(
this)->operands();
441return cast<SCEVUDivExpr>(
this)->operands();
458// If there is a constant factor, it will be first. 462// Return true if the value is negative, this matches things like (-42 * V). 463return SC->getAPInt().isNegative();
478if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
return S;
480 UniqueSCEVs.InsertNode(S, IP);
499if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
502 UniqueSCEVs.InsertNode(S, IP);
521"Must be a non-bit-width-changing pointer-to-integer cast!");
533"Cannot truncate non-integer value!");
540"Cannot zero extend non-integer value!");
547"Cannot sign extend non-integer value!");
550void SCEVUnknown::deleted() {
551// Clear this SCEVUnknown from various maps. 552 SE->forgetMemoizedResults(
this);
554// Remove this SCEVUnknown from the uniquing map. 555 SE->UniqueSCEVs.RemoveNode(
this);
561void SCEVUnknown::allUsesReplacedWith(
Value *New) {
562// Clear this SCEVUnknown from various maps. 563 SE->forgetMemoizedResults(
this);
565// Remove this SCEVUnknown from the uniquing map. 566 SE->UniqueSCEVs.RemoveNode(
this);
568// Replace the value pointer in case someone is still using this SCEVUnknown. 572//===----------------------------------------------------------------------===// 574//===----------------------------------------------------------------------===// 576/// Compare the two values \p LV and \p RV in terms of their "complexity" where 577/// "complexity" is a partial (and somewhat ad-hoc) relation used to order 578/// operands in SCEV expressions. 584// Order pointer values after integer values. This helps SCEVExpander form 588if (LIsPointer != RIsPointer)
589return (
int)LIsPointer - (int)RIsPointer;
591// Compare getValueID values. 594return (
int)LID - (int)RID;
596// Sort arguments by their position. 597if (
constauto *LA = dyn_cast<Argument>(LV)) {
598constauto *
RA = cast<Argument>(RV);
599unsigned LArgNo = LA->getArgNo(), RArgNo =
RA->getArgNo();
600return (
int)LArgNo - (int)RArgNo;
603if (
constauto *LGV = dyn_cast<GlobalValue>(LV)) {
604constauto *RGV = cast<GlobalValue>(RV);
606constauto IsGVNameSemantic = [&](
constGlobalValue *GV) {
607auto LT = GV->getLinkage();
612// Use the names to distinguish the two values, but only if the 613// names are semantically important. 614if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
615return LGV->getName().compare(RGV->getName());
618// For instructions, compare their loop depth, and their operand count. This 620if (
constauto *LInst = dyn_cast<Instruction>(LV)) {
621constauto *RInst = cast<Instruction>(RV);
623// Compare loop depths. 626if (LParent != RParent) {
630return (
int)LDepth - (int)RDepth;
633// Compare the number of operands. 634unsigned LNumOps = LInst->getNumOperands(),
635 RNumOps = RInst->getNumOperands();
636if (LNumOps != RNumOps)
637return (
int)LNumOps - (int)RNumOps;
639for (
unsignedIdx :
seq(LNumOps)) {
650// Return negative, zero, or positive, if LHS is less than, equal to, or greater 651// than RHS, respectively. A three-way result allows recursive comparisons to be 653// If the max analysis depth was reached, return std::nullopt, assuming we do 654// not know if they are equivalent for sure. 655static std::optional<int>
659// Fast-path: SCEVs are uniqued so we can do a quick equality check. 663// Primarily, sort the SCEVs by their getSCEVType(). 666return (
int)LType - (int)RType;
674// Aside from the getSCEVType() ordering, the particular ordering 675// isn't very important except that it's beneficial to be consistent, 676// so that (a + b) and (b + a) don't end up as different expressions. 693// Compare constant values. 696unsigned LBitWidth = LA.
getBitWidth(), RBitWidth =
RA.getBitWidth();
697if (LBitWidth != RBitWidth)
698return (
int)LBitWidth - (int)RBitWidth;
699return LA.
ult(
RA) ? -1 : 1;
703constauto *LTy = cast<IntegerType>(cast<SCEVVScale>(
LHS)->
getType());
704constauto *RTy = cast<IntegerType>(cast<SCEVVScale>(
RHS)->
getType());
705return LTy->getBitWidth() - RTy->getBitWidth();
712// There is always a dominance between two recs that are used by one SCEV, 713// so we can safely sort recs by loop header dominance. We require such 714// order in getAddExpr. 718assert(LHead != RHead &&
"Two loops share the same header?");
722"No dominance between recurrences used by one SCEV?");
744// Lexicographically compare n-ary-like expressions. 745unsigned LNumOps = LOps.
size(), RNumOps = ROps.
size();
746if (LNumOps != RNumOps)
747return (
int)LNumOps - (int)RNumOps;
749for (
unsigned i = 0; i != LNumOps; ++i) {
765/// Given a list of SCEV objects, order them by their complexity, and group 766/// objects of the same complexity together by value. When this routine is 767/// finished, we know that any duplicates in the vector are consecutive and that 768/// complexity is monotonically increasing. 770/// Note that we go take special precautions to ensure that we get deterministic 771/// results from this routine. In other words, we don't want the results of 772/// this to depend on where the addresses of various SCEV objects happened to 776if (Ops.
size() < 2)
return;
// Noop 780// Whether LHS has provably less complexity than RHS. 783return Complexity && *Complexity < 0;
785if (Ops.
size() == 2) {
786// This is the common case, which also happens to be trivially simple. 789if (IsLessComplex(
RHS,
LHS))
794// Do the rough sort by complexity. 796return IsLessComplex(
LHS,
RHS);
799// Now that we are sorted by complexity, group elements of the same 800// complexity. Note that this is, at worst, N^2, but the vector is likely to 801// be extremely short in practice. Note that we take this approach because we 802// do not want to depend on the addresses of the objects we are grouping. 803for (
unsigned i = 0, e = Ops.
size(); i != e-2; ++i) {
804constSCEV *S = Ops[i];
807// If there are any objects of the same complexity and same value as this 809for (
unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
810if (Ops[j] == S) {
// Found a duplicate. 811// Move it to immediately after i'th element. 813 ++i;
// no need to rescan it. 814if (i == e-2)
return;
// Done! 820/// Returns true if \p Ops contains a huge SCEV (the subtree of S contains at 821/// least HugeExprThreshold nodes). 828/// Performs a number of common optimizations on the passed \p Ops. If the 829/// whole expression reduces down to a single operand, it will be returned. 831/// The following optimizations are performed: 832/// * Fold constants using the \p Fold function. 833/// * Remove identity constants satisfying \p IsIdentity. 834/// * If a constant satisfies \p IsAbsorber, return it. 835/// * Sort operands by complexity. 836template <
typename FoldT,
typename IsIdentityT,
typename IsAbsorberT>
840 IsIdentityT IsIdentity, IsAbsorberT IsAbsorber) {
844if (
constauto *
C = dyn_cast<SCEVConstant>(
Op)) {
848 Folded = cast<SCEVConstant>(
857assert(Folded &&
"Must have folded value");
861if (Folded && IsAbsorber(Folded->
getAPInt()))
865if (Folded && !IsIdentity(Folded->
getAPInt()))
868return Ops.
size() == 1 ? Ops[0] :
nullptr;
871//===----------------------------------------------------------------------===// 872// Simple SCEV method implementations 873//===----------------------------------------------------------------------===// 875/// Compute BC(It, K). The result has width W. Assume, K > 0. 879// Handle the simplest case efficiently. 883// We are using the following formula for BC(It, K): 885// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! 887// Suppose, W is the bitwidth of the return value. We must be prepared for 888// overflow. Hence, we must assure that the result of our computation is 889// equal to the accurate one modulo 2^W. Unfortunately, division isn't 890// safe in modular arithmetic. 892// However, this code doesn't use exactly that formula; the formula it uses 893// is something like the following, where T is the number of factors of 2 in 894// K! (i.e. trailing zeros in the binary representation of K!), and ^ is 897// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) 899// This formula is trivially equivalent to the previous formula. However, 900// this formula can be implemented much more efficiently. The trick is that 901// K! / 2^T is odd, and exact division by an odd number *is* safe in modular 902// arithmetic. To do exact division in modular arithmetic, all we have 903// to do is multiply by the inverse. Therefore, this step can be done at 906// The next issue is how to safely do the division by 2^T. The way this 907// is done is by doing the multiplication step at a width of at least W + T 908// bits. This way, the bottom W+T bits of the product are accurate. Then, 909// when we perform the division by 2^T (which is equivalent to a right shift 910// by T), the bottom W bits are accurate. Extra bits are okay; they'll get 911// truncated out after the division by 2^T. 913// In comparison to just directly using the first formula, this technique 914// is much more efficient; using the first formula requires W * K bits, 915// but this formula less than W + K bits. Also, the first formula requires 916// a division step, whereas this formula only requires multiplies and shifts. 918// It doesn't matter whether the subtraction step is done in the calculation 919// width or the input iteration count's width; if the subtraction overflows, 920// the result must be zero anyway. We prefer here to do it in the width of 921// the induction variable because it helps a lot for certain cases; CodeGen 922// isn't smart enough to ignore the overflow, which leads to much less 923// efficient code if the width of the subtraction is wider than the native 926// (It's possible to not widen at all by pulling out factors of 2 before 927// the multiplication; for example, K=2 can be calculated as 928// It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires 929// extra arithmetic, so it's not an obvious win, and it gets 930// much more complicated for K > 3.) 932// Protection from insane SCEVs; this bound is conservative, 933// but it probably doesn't matter. 939// Calculate K! / 2^T and T; we divide out the factors of two before 940// multiplying for calculating K! / 2^T to avoid overflow. 941// Other overflow doesn't matter because we only care about the bottom 942// W bits of the result. 943APInt OddFactorial(W, 1);
945for (
unsigned i = 3; i <= K; ++i) {
948 OddFactorial *= (i >> TwoFactors);
951// We need at least W + T bits for the multiplication step 952unsigned CalculationBits = W +
T;
954// Calculate 2^T, at width T+W. 957// Calculate the multiplicative inverse of K! / 2^T; 958// this multiplication factor will perform the exact division by 962// Calculate the product, at width T+W 966for (
unsigned i = 1; i != K; ++i) {
975// Truncate the result, and divide by K! / 2^T. 981/// Return the value of this chain of recurrences at the specified iteration 982/// number. We can evaluate this recurrence by multiplying each element in the 983/// chain by the binomial coefficient corresponding to it. In other words, we 984/// can evaluate {A,+,B,+,C,+,D} as: 986/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) 988/// where BC(It, k) stands for binomial coefficient. 999for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
1000// The computation is correct in the face of overflow provided that the 1001// multiplication is performed _after_ the evaluation of the binomial 1004if (isa<SCEVCouldNotCompute>(Coeff))
1012//===----------------------------------------------------------------------===// 1013// SCEV Expression folder implementations 1014//===----------------------------------------------------------------------===// 1019"getLosslessPtrToIntExpr() should self-recurse at most once.");
1021// We could be called with an integer-typed operands during SCEV rewrites. 1022// Since the operand is an integer already, just perform zext/trunc/self cast. 1023if (!
Op->getType()->isPointerTy())
1026// What would be an ID for such a SCEV cast expression? 1033// Is there already an expression for such a cast? 1034if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
1037// It isn't legal for optimizations to construct new ptrtoint expressions 1038// for non-integral pointers. 1044// We can only trivially model ptrtoint if SCEV's effective (integer) type 1045// is sufficiently wide to represent all possible pointer values. 1046// We could theoretically teach SCEV to truncate wider pointers, but 1047// that isn't implemented for now. 1052// If not, is this expression something we can't reduce any further? 1053if (
auto *U = dyn_cast<SCEVUnknown>(
Op)) {
1054// Perform some basic constant folding. If the operand of the ptr2int cast 1055// is a null pointer, don't create a ptr2int SCEV expression (that will be 1056// left as-is), but produce a zero constant. 1057// NOTE: We could handle a more general case, but lack motivational cases. 1058if (isa<ConstantPointerNull>(U->getValue()))
1061// Create an explicit cast node. 1062// We can reuse the existing insert position since if we get here, 1063// we won't have made any changes which would invalidate it. 1064SCEV *S =
new (SCEVAllocator)
1066 UniqueSCEVs.InsertNode(S, IP);
1071assert(
Depth == 0 &&
"getLosslessPtrToIntExpr() should not self-recurse for " 1072"non-SCEVUnknown's.");
1074// Otherwise, we've got some expression that is more complex than just a 1075// single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an 1076// arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown 1077// only, and the expressions must otherwise be integer-typed. 1078// So sink the cast down to the SCEVUnknown's. 1080 /// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression, 1081 /// which computes a pointer-typed value, and rewrites the whole expression 1082 /// tree so that *all* the computations are done on integers, and the only 1083 /// pointer-typed operands in the expression are SCEVUnknown. 1084classSCEVPtrToIntSinkingRewriter
1092 SCEVPtrToIntSinkingRewriter
Rewriter(SE);
1098// If the expression is not pointer-typed, just keep it as-is. 1101// Else, recursively sink the cast down into it. 1102return Base::visit(S);
1127"Should only reach pointer-typed SCEVUnknown's.");
1132// And actually perform the cast sinking. 1133constSCEV *IntOp = SCEVPtrToIntSinkingRewriter::rewrite(
Op, *
this);
1135"We must have succeeded in sinking the cast, " 1136"and ending up with an integer-typed expression!");
1144if (isa<SCEVCouldNotCompute>(IntOp))
1153"This is not a truncating conversion!");
1155"This is not a conversion to a SCEVable type!");
1156assert(!
Op->getType()->isPointerTy() &&
"Can't truncate pointer!");
1164if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
return S;
1166// Fold if the operand is constant. 1171// trunc(trunc(x)) --> trunc(x) 1175// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing 1179// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing 1186 UniqueSCEVs.InsertNode(S, IP);
1191// trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and 1192// trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), 1193// if after transforming we have at most one truncate, not counting truncates 1194// that replace other casts. 1195if (isa<SCEVAddExpr>(
Op) || isa<SCEVMulExpr>(
Op)) {
1196auto *CommOp = cast<SCEVCommutativeExpr>(
Op);
1198unsigned numTruncs = 0;
1199for (
unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
1202if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) &&
1203 isa<SCEVTruncateExpr>(S))
1208if (isa<SCEVAddExpr>(
Op))
1210if (isa<SCEVMulExpr>(
Op))
1214// Although we checked in the beginning that ID is not in the cache, it is 1215// possible that during recursion and different modification ID was inserted 1216// into the cache. So if we find it, just return it. 1217if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
1221// If the input value is a chrec scev, truncate the chrec's operands. 1224for (
constSCEV *
Op : AddRec->operands())
1229// Return zero if truncating to known zeros. 1234// The cast wasn't folded; create an explicit cast node. We can reuse 1235// the existing insert position since if we get here, we won't have 1236// made any changes which would invalidate it. 1239 UniqueSCEVs.InsertNode(S, IP);
1244// Get the limit of a recurrence such that incrementing by Step cannot cause 1245// signed overflow as long as the value of the recurrence within the 1246// loop does not exceed this limit before incrementing. 1264// Get the limit of a recurrence such that incrementing by Step cannot cause 1265// unsigned overflow as long as the value of the recurrence within the loop does 1266// not exceed this limit before incrementing. 1279structExtendOpTraitsBase {
1284// Used to make code generic over signed and unsigned overflow. 1285template <
typename ExtendOp>
structExtendOpTraits {
1288// static const SCEV::NoWrapFlags WrapType; 1290// static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; 1292// static const SCEV *getOverflowLimitForStep(const SCEV *Step, 1293// ICmpInst::Predicate *Pred, 1294// ScalarEvolution *SE); 1301staticconst GetExtendExprTy GetExtendExpr;
1303staticconstSCEV *getOverflowLimitForStep(
constSCEV *Step,
1310const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1317staticconst GetExtendExprTy GetExtendExpr;
1319staticconstSCEV *getOverflowLimitForStep(
constSCEV *Step,
1326const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1329}
// end anonymous namespace 1331// The recurrence AR has been shown to have no signed/unsigned wrap or something 1332// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as 1333// easily prove NSW/NUW for its preincrement or postincrement sibling. This 1334// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + 1335// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the 1336// expression "Step + sext/zext(PreIncAR)" is congruent with 1337// "sext/zext(PostIncAR)" 1338template <
typename ExtendOpTy>
1341auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1342auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1348// Check for a simple looking step prior to loop entry. 1349constSCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
1353// Create an AddExpr for "PreStart" after subtracting Step. Full SCEV 1354// subtraction is expensive. For this purpose, perform a quick and dirty 1355// difference, by checking for Step in the operand list. Note, that 1356// SA might have repeated ops, like %a + %a + ..., so only remove one. 1358for (
auto It = DiffOps.
begin(); It != DiffOps.
end(); ++It)
1367// Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + 1370// 1. NSW/NUW flags on the step increment. 1377// "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies 1378// "S+X does not sign/unsign-overflow". 1386// 2. Direct overflow check on the step operation's expression. 1389constSCEV *OperandExtendedStart =
1391 (SE->*GetExtendExpr)(Step, WideTy,
Depth));
1392if ((SE->*GetExtendExpr)(Start, WideTy,
Depth) == OperandExtendedStart) {
1394// If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW 1395// or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then 1396// `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. 1402// 3. Loop precondition. 1404constSCEV *OverflowLimit =
1405 ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
1414// Get the normalized zero or sign extended expression for this AddRec's Start. 1415template <
typename ExtendOpTy>
1419auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1421constSCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE,
Depth);
1427 (SE->*GetExtendExpr)(PreStart, Ty,
Depth));
1430// Try to prove away overflow by looking at "nearby" add recurrences. A 1431// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it 1432// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. 1436// {S,+,X} == {S-T,+,X} + T 1437// => Ext({S,+,X}) == Ext({S-T,+,X} + T) 1439// If ({S-T,+,X} + T) does not overflow ... (1) 1441// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) 1443// If {S-T,+,X} does not overflow ... (2) 1445// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) 1446// == {Ext(S-T)+Ext(T),+,Ext(X)} 1448// If (S-T)+T does not overflow ... (3) 1450// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} 1451// == {Ext(S),+,Ext(X)} == LHS 1453// Thus, if (1), (2) and (3) are true for some T, then 1454// Ext({S,+,X}) == {Ext(S),+,Ext(X)} 1456// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) 1457// does not overflow" restricted to the 0th iteration. Therefore we only need 1458// to check for (1) and (2). 1460// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T 1461// is `Delta` (defined below). 1462template <
typename ExtendOpTy>
1463bool ScalarEvolution::proveNoWrapByVaryingStart(
constSCEV *Start,
1466auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1468// We restrict `Start` to a constant to prevent SCEV from spending too much 1469// time here. It is correct (but more expensive) to continue with a 1470// non-constant `Start` and do a general SCEV subtraction to compute 1472constSCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
1478for (
unsigned Delta : {-2, -1, 1, 2}) {
1483ID.AddPointer(PreStart);
1490// Give up if we don't already have the add recurrence we need because 1491// actually constructing an add recurrence is relatively expensive. 1492if (PreAR && PreAR->getNoWrapFlags(WrapType)) {
// proves (2) 1495constSCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
1496 DeltaS, &Pred,
this);
1505// Finds an integer D for an expression (C + x + y + ...) such that the top 1506// level addition in (D + (C - D + x + y + ...)) would not wrap (signed or 1507// unsigned) and the number of trailing zeros of (C - D + x + y + ...) is 1508// maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and 1509// the (C + x + y + ...) expression is \p WholeAddExpr. 1515// Find number of trailing zeros of (x + y + ...) w/o the C first: 1520// Set D to be as many least significant bits of C as possible while still 1521// guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap: 1527// Finds an integer D for an affine AddRec expression {C,+,x} such that the top 1528// level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the 1529// number of trailing zeros of (C - D + x * n) is maximized, where C is the \p 1530// ConstantStart, x is an arbitrary \p Step, and n is the loop trip count. 1532constAPInt &ConstantStart,
1549// Remove FoldCacheUser entry for ID when replacing an existing FoldCache 1551auto &UserIDs = FoldCacheUser[
I.first->second];
1552assert(
count(UserIDs,
ID) == 1 &&
"unexpected duplicates in UserIDs");
1553for (
unsignedI = 0;
I != UserIDs.size(); ++
I)
1554if (UserIDs[
I] ==
ID) {
1561 FoldCacheUser[S].push_back(
ID);
1567"This is not an extending conversion!");
1569"This is not a conversion to a SCEVable type!");
1570assert(!
Op->getType()->isPointerTy() &&
"Can't extend pointer!");
1574auto Iter = FoldCache.find(
ID);
1575if (Iter != FoldCache.end())
1579if (!isa<SCEVZeroExtendExpr>(S))
1587"This is not an extending conversion!");
1589assert(!
Op->getType()->isPointerTy() &&
"Can't extend pointer!");
1591// Fold if the operand is constant. 1595// zext(zext(x)) --> zext(x) 1599// Before doing any expensive analysis, check to see if we've already 1600// computed a SCEV for this Op and Ty. 1606if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
return S;
1610 UniqueSCEVs.InsertNode(S, IP);
1615// zext(trunc(x)) --> zext(x) or x or trunc(x) 1617// It's possible the bits taken off by the truncate were all zero bits. If 1618// so, we should be able to simplify this further. 1619constSCEV *
X = ST->getOperand();
1628// If the input value is a chrec scev, and we can prove that the value 1629// did not overflow the old, smaller, value, we can zero extend all of the 1630// operands (often constants). This allows analysis of something like 1631// this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } 1633if (AR->isAffine()) {
1634constSCEV *Start = AR->getStart();
1635constSCEV *Step = AR->getStepRecurrence(*
this);
1637constLoop *L = AR->getLoop();
1639// If we have special knowledge that this addrec won't overflow, 1640// we don't need to do any further analysis. 1641if (AR->hasNoUnsignedWrap()) {
1643 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty,
this,
Depth + 1);
1648// Check whether the backedge-taken count is SCEVCouldNotCompute. 1649// Note that this serves two purposes: It filters out loops that are 1650// simply not analyzable, and it covers the case where this code is 1651// being called from within backedge-taken count analysis, such that 1652// attempting to ask for the backedge-taken count would likely result 1653// in infinite recursion. In the later case, the analysis code will 1654// cope with a conservative value, and it will take care to purge 1655// that value once it has finished. 1657if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
1658// Manually compute the final value for AR, checking for overflow. 1660// Check whether the backedge-taken count can be losslessly casted to 1661// the addrec's type. The count is always unsigned. 1662constSCEV *CastedMaxBECount =
1666if (MaxBECount == RecastedMaxBECount) {
1668// Check whether Start+Step*MaxBECount has no unsigned overflow. 1676constSCEV *WideMaxBECount =
1678constSCEV *OperandExtendedAdd =
1684if (ZAdd == OperandExtendedAdd) {
1685// Cache knowledge of AR NUW, which is propagated to this AddRec. 1687// Return the expression with the addrec on the outside. 1688 Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty,
this,
1693// Similar to above, only this time treat the step value as signed. 1694// This covers loops that count down. 1695 OperandExtendedAdd =
1701if (ZAdd == OperandExtendedAdd) {
1702// Cache knowledge of AR NW, which is propagated to this AddRec. 1703// Negative step causes unsigned wrap, but it still can't self-wrap. 1705// Return the expression with the addrec on the outside. 1706 Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty,
this,
1714// Normally, in the cases we can prove no-overflow via a 1715// backedge guarding condition, we can also compute a backedge 1716// taken count for the loop. The exceptions are assumptions and 1717// guards present in the loop -- SCEV is not great at exploiting 1718// these to compute max backedge taken counts, but can still use 1719// these to prove lack of overflow. Use this fact to avoid 1720// doing extra work that may not pay off. 1721if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
1724auto NewFlags = proveNoUnsignedWrapViaInduction(AR);
1726if (AR->hasNoUnsignedWrap()) {
1727// Same as nuw case above - duplicated here to avoid a compile time 1728// issue. It's not clear that the order of checks does matter, but 1729// it's one of two issue possible causes for a change which was 1730// reverted. Be conservative for the moment. 1732 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty,
this,
Depth + 1);
1737// For a negative step, we can extend the operands iff doing so only 1738// traverses values in the range zext([0,UINT_MAX]). 1744// Cache knowledge of AR NW, which is propagated to this 1745// AddRec. Negative step causes unsigned wrap, but it 1746// still can't self-wrap. 1748// Return the expression with the addrec on the outside. 1749 Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty,
this,
1757// zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw> 1758// if D + (C - D + Step * n) could be proven to not unsigned wrap 1759// where D maximizes the number of trailing zeros of (C - D + Step * n) 1760if (
constauto *SC = dyn_cast<SCEVConstant>(Start)) {
1761constAPInt &
C = SC->getAPInt();
1765constSCEV *SResidual =
1774if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
1777 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty,
this,
Depth + 1);
1783// zext(A % B) --> zext(A) % zext(B) 1792// zext(A / B) --> zext(A) / zext(B). 1793if (
auto *Div = dyn_cast<SCEVUDivExpr>(
Op))
1797if (
auto *SA = dyn_cast<SCEVAddExpr>(
Op)) {
1798// zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw> 1799if (SA->hasNoUnsignedWrap()) {
1800// If the addition does not unsign overflow then we can, by definition, 1801// commute the zero extension with the addition operation. 1803for (
constauto *
Op : SA->operands())
1808// zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...)) 1809// if D + (C - D + x + y + ...) could be proven to not unsigned wrap 1810// where D maximizes the number of trailing zeros of (C - D + x + y + ...) 1812// Often address arithmetics contain expressions like 1813// (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))). 1814// This transformation is useful while proving that such expressions are 1815// equal or differ by a small constant amount, see LoadStoreVectorizer pass. 1816if (
constauto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
1820constSCEV *SResidual =
1830if (
auto *SM = dyn_cast<SCEVMulExpr>(
Op)) {
1831// zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw> 1832if (SM->hasNoUnsignedWrap()) {
1833// If the multiply does not unsign overflow then we can, by definition, 1834// commute the zero extension with the multiply operation. 1836for (
constauto *
Op : SM->operands())
1841// zext(2^K * (trunc X to iN)) to iM -> 1842// 2^K * (zext(trunc X to i{N-K}) to iM)<nuw> 1846// zext(2^K * (trunc X to iN)) to iM 1847// = zext((trunc X to iN) << K) to iM 1848// = zext((trunc X to i{N-K}) << K)<nuw> to iM 1849// (because shl removes the top K bits) 1850// = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM 1851// = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>. 1853if (SM->getNumOperands() == 2)
1854if (
auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
1855if (MulLHS->getAPInt().isPowerOf2())
1856if (
auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
1858 MulLHS->getAPInt().logBase2();
1868// zext(umin(x, y)) -> umin(zext(x), zext(y)) 1869// zext(umax(x, y)) -> umax(zext(x), zext(y)) 1870if (isa<SCEVUMinExpr>(
Op) || isa<SCEVUMaxExpr>(
Op)) {
1871auto *
MinMax = cast<SCEVMinMaxExpr>(
Op);
1873for (
auto *Operand :
MinMax->operands())
1875if (isa<SCEVUMinExpr>(
MinMax))
1880// zext(umin_seq(x, y)) -> umin_seq(zext(x), zext(y)) 1881if (
auto *
MinMax = dyn_cast<SCEVSequentialMinMaxExpr>(
Op)) {
1882assert(isa<SCEVSequentialUMinExpr>(
MinMax) &&
"Not supported!");
1884for (
auto *Operand :
MinMax->operands())
1889// The cast wasn't folded; create an explicit cast node. 1890// Recompute the insert position, as it may have been invalidated. 1891if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
return S;
1894 UniqueSCEVs.InsertNode(S, IP);
1902"This is not an extending conversion!");
1904"This is not a conversion to a SCEVable type!");
1905assert(!
Op->getType()->isPointerTy() &&
"Can't extend pointer!");
1909auto Iter = FoldCache.find(
ID);
1910if (Iter != FoldCache.end())
1914if (!isa<SCEVSignExtendExpr>(S))
1922"This is not an extending conversion!");
1924assert(!
Op->getType()->isPointerTy() &&
"Can't extend pointer!");
1927// Fold if the operand is constant. 1931// sext(sext(x)) --> sext(x) 1935// sext(zext(x)) --> zext(x) 1939// Before doing any expensive analysis, check to see if we've already 1940// computed a SCEV for this Op and Ty. 1946if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
return S;
1947// Limit recursion depth. 1951 UniqueSCEVs.InsertNode(S, IP);
1956// sext(trunc(x)) --> sext(x) or x or trunc(x) 1958// It's possible the bits taken off by the truncate were all sign bits. If 1959// so, we should be able to simplify this further. 1960constSCEV *
X = ST->getOperand();
1969if (
auto *SA = dyn_cast<SCEVAddExpr>(
Op)) {
1970// sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> 1971if (SA->hasNoSignedWrap()) {
1972// If the addition does not sign overflow then we can, by definition, 1973// commute the sign extension with the addition operation. 1975for (
constauto *
Op : SA->operands())
1980// sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...)) 1981// if D + (C - D + x + y + ...) could be proven to not signed wrap 1982// where D maximizes the number of trailing zeros of (C - D + x + y + ...) 1984// For instance, this will bring two seemingly different expressions: 1985// 1 + sext(5 + 20 * %x + 24 * %y) and 1986// sext(6 + 20 * %x + 24 * %y) 1988// 2 + sext(4 + 20 * %x + 24 * %y) 1989if (
constauto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
1993constSCEV *SResidual =
2002// If the input value is a chrec scev, and we can prove that the value 2003// did not overflow the old, smaller, value, we can sign extend all of the 2004// operands (often constants). This allows analysis of something like 2005// this: for (signed char X = 0; X < 100; ++X) { int Y = X; } 2007if (AR->isAffine()) {
2008constSCEV *Start = AR->getStart();
2009constSCEV *Step = AR->getStepRecurrence(*
this);
2011constLoop *L = AR->getLoop();
2013// If we have special knowledge that this addrec won't overflow, 2014// we don't need to do any further analysis. 2015if (AR->hasNoSignedWrap()) {
2017 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty,
this,
Depth + 1);
2022// Check whether the backedge-taken count is SCEVCouldNotCompute. 2023// Note that this serves two purposes: It filters out loops that are 2024// simply not analyzable, and it covers the case where this code is 2025// being called from within backedge-taken count analysis, such that 2026// attempting to ask for the backedge-taken count would likely result 2027// in infinite recursion. In the later case, the analysis code will 2028// cope with a conservative value, and it will take care to purge 2029// that value once it has finished. 2031if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
2032// Manually compute the final value for AR, checking for 2035// Check whether the backedge-taken count can be losslessly casted to 2036// the addrec's type. The count is always unsigned. 2037constSCEV *CastedMaxBECount =
2041if (MaxBECount == RecastedMaxBECount) {
2043// Check whether Start+Step*MaxBECount has no signed overflow. 2051constSCEV *WideMaxBECount =
2053constSCEV *OperandExtendedAdd =
2059if (SAdd == OperandExtendedAdd) {
2060// Cache knowledge of AR NSW, which is propagated to this AddRec. 2062// Return the expression with the addrec on the outside. 2063 Start = getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty,
this,
2068// Similar to above, only this time treat the step value as unsigned. 2069// This covers loops that count up with an unsigned step. 2070 OperandExtendedAdd =
2076if (SAdd == OperandExtendedAdd) {
2077// If AR wraps around then 2079// abs(Step) * MaxBECount > unsigned-max(AR->getType()) 2080// => SAdd != OperandExtendedAdd 2082// Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> 2083// (SAdd == OperandExtendedAdd => AR is NW) 2087// Return the expression with the addrec on the outside. 2088 Start = getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty,
this,
2096auto NewFlags = proveNoSignedWrapViaInduction(AR);
2098if (AR->hasNoSignedWrap()) {
2099// Same as nsw case above - duplicated here to avoid a compile time 2100// issue. It's not clear that the order of checks does matter, but 2101// it's one of two issue possible causes for a change which was 2102// reverted. Be conservative for the moment. 2104 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty,
this,
Depth + 1);
2109// sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw> 2110// if D + (C - D + Step * n) could be proven to not signed wrap 2111// where D maximizes the number of trailing zeros of (C - D + Step * n) 2112if (
constauto *SC = dyn_cast<SCEVConstant>(Start)) {
2113constAPInt &
C = SC->getAPInt();
2117constSCEV *SResidual =
2126if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
2129 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty,
this,
Depth + 1);
2135// If the input value is provably positive and we could not simplify 2136// away the sext build a zext instead. 2140// sext(smin(x, y)) -> smin(sext(x), sext(y)) 2141// sext(smax(x, y)) -> smax(sext(x), sext(y)) 2142if (isa<SCEVSMinExpr>(
Op) || isa<SCEVSMaxExpr>(
Op)) {
2143auto *
MinMax = cast<SCEVMinMaxExpr>(
Op);
2145for (
auto *Operand :
MinMax->operands())
2147if (isa<SCEVSMinExpr>(
MinMax))
2152// The cast wasn't folded; create an explicit cast node. 2153// Recompute the insert position, as it may have been invalidated. 2154if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
return S;
2157 UniqueSCEVs.InsertNode(S, IP);
2178/// getAnyExtendExpr - Return a SCEV for the given operand extended with 2179/// unspecified bits out to the given type. 2183"This is not an extending conversion!");
2185"This is not a conversion to a SCEVable type!");
2188// Sign-extend negative constants. 2190if (SC->getAPInt().isNegative())
2193// Peel off a truncate cast. 2195constSCEV *NewOp =
T->getOperand();
2201// Next try a zext cast. If the cast is folded, use it. 2203if (!isa<SCEVZeroExtendExpr>(ZExt))
2206// Next try a sext cast. If the cast is folded, use it. 2208if (!isa<SCEVSignExtendExpr>(SExt))
2211// Force the cast to be folded into the operands of an addrec. 2214for (
constSCEV *
Op : AR->operands())
2219// If the expression is obviously signed, use the sext cast value. 2220if (isa<SCEVSMaxExpr>(
Op))
2223// Absent any other information, use the zext cast value. 2227/// Process the given Ops list, which is a list of operands to be added under 2228/// the given scale, update the given map. This is a helper function for 2229/// getAddRecExpr. As an example of what it does, given a sequence of operands 2230/// that would form an add expression like this: 2232/// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) 2234/// where A and B are constants, update the map with these values: 2236/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) 2238/// and add 13 + A*B*29 to AccumulatedConstant. 2239/// This will allow getAddRecExpr to produce this: 2241/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) 2243/// This form often exposes folding opportunities that are hidden in 2244/// the original operand list. 2246/// Return true iff it appears that any interesting folding opportunities 2247/// may be exposed. This helps getAddRecExpr short-circuit extra work in 2248/// the common case where no interesting opportunities are present, and 2249/// is also used as a check to avoid infinite recursion. 2253APInt &AccumulatedConstant,
2256bool Interesting =
false;
2258// Iterate over the add operands. They are sorted, with constants first. 2260while (
constSCEVConstant *
C = dyn_cast<SCEVConstant>(Ops[i])) {
2262// Pull a buried constant out to the outside. 2263if (Scale != 1 || AccumulatedConstant != 0 ||
C->getValue()->isZero())
2265 AccumulatedConstant += Scale *
C->getAPInt();
2268// Next comes everything else. We're especially interested in multiplies 2269// here, but they're in the middle, so just visit the rest with one loop. 2270for (; i != Ops.
size(); ++i) {
2272if (
Mul && isa<SCEVConstant>(
Mul->getOperand(0))) {
2274 Scale * cast<SCEVConstant>(
Mul->getOperand(0))->getAPInt();
2275if (
Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(
Mul->getOperand(1))) {
2276// A multiplication of a constant with another add; recurse. 2280Add->operands(), NewScale, SE);
2282// A multiplication of a constant with some other value. Update 2286auto Pair = M.insert({Key, NewScale});
2290 Pair.first->second += NewScale;
2291// The map already had an entry for this value, which may indicate 2292// a folding opportunity. 2297// An ordinary operand. Update the map. 2298 std::pair<DenseMap<const SCEV *, APInt>::iterator,
bool> Pair =
2299 M.insert({Ops[i], Scale});
2303 Pair.first->second += Scale;
2304// The map already had an entry for this value, which may indicate 2305// a folding opportunity. 2322case Instruction::Add:
2325case Instruction::Sub:
2328case Instruction::Mul:
2337// Check ext(LHS op RHS) == ext(LHS) op ext(RHS) 2338auto *NarrowTy = cast<IntegerType>(
LHS->
getType());
2342constSCEV *
A = (this->*Extension)(
2344constSCEV *LHSB = (this->*Extension)(
LHS, WideTy, 0);
2345constSCEV *RHSB = (this->*Extension)(
RHS, WideTy, 0);
2349// Can we use context to prove the fact we need? 2352// TODO: Support mul. 2353if (BinOp == Instruction::Mul)
2355auto *RHSC = dyn_cast<SCEVConstant>(
RHS);
2356// TODO: Lift this limitation. 2359APIntC = RHSC->getAPInt();
2360unsigned NumBits =
C.getBitWidth();
2361bool IsSub = (BinOp == Instruction::Sub);
2362bool IsNegativeConst = (
Signed &&
C.isNegative());
2363// Compute the direction and magnitude by which we need to check overflow. 2364bool OverflowDown = IsSub ^ IsNegativeConst;
2366if (IsNegativeConst) {
2368// TODO: SINT_MIN on inversion gives the same negative value, we don't 2369// want to deal with that. 2376// To avoid overflow down, we need to make sure that MIN + Magnitude <= LHS. 2379APInt Limit = Min + Magnitude;
2382// To avoid overflow up, we need to make sure that LHS <= MAX - Magnitude. 2385APInt Limit = Max - Magnitude;
2390std::optional<SCEV::NoWrapFlags>
2393// It cannot be done any better. 2406if (OBO->
getOpcode() != Instruction::Add &&
2418/* Signed */false,
LHS,
RHS, CtxI)) {
2425/* Signed */true,
LHS,
RHS, CtxI)) {
2435// We're trying to construct a SCEV of type `Type' with `Ops' as operands and 2436// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of 2437// can't-overflow flags for the operation if possible. 2442using namespacestd::placeholders;
2449assert(CanAnalyze &&
"don't call from other places!");
2455// If FlagNSW is true and all the operands are non-negative, infer FlagNUW. 2456auto IsKnownNonNegative = [&](
constSCEV *S) {
2466if (SignOrUnsignWrap != SignOrUnsignMask &&
2468 isa<SCEVConstant>(Ops[0])) {
2473return Instruction::Add;
2475return Instruction::Mul;
2481constAPInt &
C = cast<SCEVConstant>(Ops[0])->getAPInt();
2483// (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow. 2486 Opcode,
C, OBO::NoSignedWrap);
2491// (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow. 2494 Opcode,
C, OBO::NoUnsignedWrap);
2500// <0,+,nonnegative><nw> is also nuw 2501// TODO: Add corresponding nsw case 2504 Ops[0]->isZero() && IsKnownNonNegative(Ops[1]))
2507// both (udiv X, Y) * Y and Y * (udiv X, Y) are always NUW 2510if (
auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[0]))
2511if (UDiv->getOperand(1) == Ops[1])
2513if (
auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[1]))
2514if (UDiv->getOperand(1) == Ops[0])
2525/// Get a canonical add expression, or something simpler if possible. 2530"only nuw or nsw allowed");
2532if (Ops.
size() == 1)
return Ops[0];
2535for (
unsigned i = 1, e = Ops.
size(); i != e; ++i)
2537"SCEVAddExpr operand types don't match!");
2540assert(NumPtrs <= 1 &&
"add has at most one pointer operand");
2545 [](
constAPInt &C1,
constAPInt &C2) {
return C1 + C2; },
2546 [](
constAPInt &
C) {
returnC.isZero(); },
// identity 2547 [](
constAPInt &
C) {
returnfalse; });
// absorber 2551unsignedIdx = isa<SCEVConstant>(Ops[0]) ? 1 : 0;
2553// Delay expensive flag strengthening until necessary. 2558// Limit recursion calls depth. 2560return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
2563// Don't strengthen flags if we have no new information. 2565if (
Add->getNoWrapFlags(OrigFlags) != OrigFlags)
2566Add->setNoWrapFlags(ComputeFlags(Ops));
2570// Okay, check to see if the same value occurs in the operand list more than 2571// once. If so, merge them together into an multiply expression. Since we 2572// sorted the list, these values are required to be adjacent. 2573Type *Ty = Ops[0]->getType();
2574bool FoundMatch =
false;
2575for (
unsigned i = 0, e = Ops.
size(); i != e-1; ++i)
2576if (Ops[i] == Ops[i+1]) {
// X + Y + Y --> X + Y*2 2577// Scan ahead to count how many equal operands there are. 2579while (i+Count != e && Ops[i+Count] == Ops[i])
2581// Merge the values into a multiply. 2584if (Ops.
size() == Count)
2588 --i; e -= Count - 1;
2594// Check for truncates. If all the operands are truncated from the same 2595// type, see if factoring out the truncate would permit the result to be 2596// folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y) 2597// if the contents of the resulting outer trunc fold to something simple. 2598auto FindTruncSrcType = [&]() ->
Type * {
2599// We're ultimately looking to fold an addrec of truncs and muls of only 2600// constants and truncs, so if we find any other types of SCEV 2601// as operands of the addrec then we bail and return nullptr here. 2602// Otherwise, we return the type of the operand of a trunc that we find. 2603if (
auto *
T = dyn_cast<SCEVTruncateExpr>(Ops[
Idx]))
2604returnT->getOperand()->getType();
2605if (
constauto *
Mul = dyn_cast<SCEVMulExpr>(Ops[
Idx])) {
2606constauto *LastOp =
Mul->getOperand(
Mul->getNumOperands() - 1);
2607if (
constauto *
T = dyn_cast<SCEVTruncateExpr>(LastOp))
2608returnT->getOperand()->getType();
2612if (
auto *SrcType = FindTruncSrcType()) {
2615// Check all the operands to see if they can be represented in the 2616// source type of the truncate. 2617for (
constSCEV *
Op : Ops) {
2619if (
T->getOperand()->getType() != SrcType) {
2628for (
unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
2630 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
2631if (
T->getOperand()->getType() != SrcType) {
2636 }
elseif (
constauto *
C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
2651// Evaluate the expression in the larger type. 2653// If it folds to something simple, use it. Otherwise, don't. 2654if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
2659if (Ops.
size() == 2) {
2660// Check if we have an expression of the form ((X + C1) - C2), where C1 and 2661// C2 can be folded in a way that allows retaining wrapping flags of (X + 2663constSCEV *
A = Ops[0];
2664constSCEV *
B = Ops[1];
2665auto *AddExpr = dyn_cast<SCEVAddExpr>(
B);
2666auto *
C = dyn_cast<SCEVConstant>(
A);
2667if (AddExpr &&
C && isa<SCEVConstant>(AddExpr->getOperand(0))) {
2668auto C1 = cast<SCEVConstant>(AddExpr->getOperand(0))->getAPInt();
2669auto C2 =
C->getAPInt();
2672APInt ConstAdd = C1 + C2;
2673auto AddFlags = AddExpr->getNoWrapFlags();
2674// Adding a smaller constant is NUW if the original AddExpr was NUW. 2681// Adding a constant with the same sign and small magnitude is NSW, if the 2682// original AddExpr was NSW. 2698// Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y) 2699if (Ops.
size() == 2) {
2701if (
Mul &&
Mul->getNumOperands() == 2 &&
2702Mul->getOperand(0)->isAllOnesValue()) {
2705if (matchURem(
Mul->getOperand(1),
X,
Y) &&
X == Ops[1]) {
2711// Skip past any other cast SCEVs. 2715// If there are add operands they would be next. 2717bool DeletedAdd =
false;
2718// If the original flags and all inlined SCEVAddExprs are NUW, use the 2719// common NUW flag for expression after inlining. Other flags cannot be 2720// preserved, because they may depend on the original order of operations. 2726// If we have an add, expand the add operands onto the end of the operands 2731 CommonFlags =
maskFlags(CommonFlags,
Add->getNoWrapFlags());
2734// If we deleted at least one add, we added operands to the end of the list, 2735// and they are not necessarily sorted. Recurse to resort and resimplify 2736// any operands we just acquired. 2741// Skip over the add expression until we get to a multiply. 2745// Check to see if there are any folding opportunities present with 2746// operands multiplied by constant values. 2747if (
Idx < Ops.
size() && isa<SCEVMulExpr>(Ops[
Idx])) {
2760// Some interesting folding opportunity is present, so its worthwhile to 2761// re-generate the operands list. Group the operands by constant scale, 2762// to avoid multiplying by the same constant scale multiple times. 2763 std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
2764for (
constSCEV *NewOp : NewOps)
2765 MulOpLists[M.find(NewOp)->second].push_back(NewOp);
2766// Re-generate the operands list. 2768if (AccumulatedConstant != 0)
2770for (
auto &MulOp : MulOpLists) {
2771if (MulOp.first == 1) {
2773 }
elseif (MulOp.first != 0) {
2788// If we are adding something to a multiply expression, make sure the 2789// something is not already an operand of the multiply. If so, merge it into 2791for (;
Idx < Ops.
size() && isa<SCEVMulExpr>(Ops[
Idx]); ++
Idx) {
2793for (
unsigned MulOp = 0, e =
Mul->getNumOperands(); MulOp != e; ++MulOp) {
2794constSCEV *MulOpSCEV =
Mul->getOperand(MulOp);
2795if (isa<SCEVConstant>(MulOpSCEV))
2797for (
unsigned AddOp = 0, e = Ops.
size(); AddOp != e; ++AddOp)
2798if (MulOpSCEV == Ops[AddOp]) {
2799// Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) 2800constSCEV *InnerMul =
Mul->getOperand(MulOp == 0);
2801if (
Mul->getNumOperands() != 2) {
2802// If the multiply has more than two operands, we must get the 2805Mul->operands().take_front(MulOp));
2813if (Ops.
size() == 2)
return OuterMul;
2825// Check this multiply against other multiplies being added together. 2826for (
unsigned OtherMulIdx =
Idx+1;
2827 OtherMulIdx < Ops.
size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
2829constSCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
2830// If MulOp occurs in OtherMul, we can fold the two multiplies 2833 OMulOp != e; ++OMulOp)
2834if (OtherMul->
getOperand(OMulOp) == MulOpSCEV) {
2835// Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) 2836constSCEV *InnerMul1 =
Mul->getOperand(MulOp == 0);
2837if (
Mul->getNumOperands() != 2) {
2839Mul->operands().take_front(MulOp));
2846 OtherMul->
operands().take_front(OMulOp));
2851constSCEV *InnerMulSum =
2855if (Ops.
size() == 2)
return OuterMul;
2865// If there are any add recurrences in the operands list, see if any other 2866// added values are loop invariant. If so, we can fold them into the 2871// Scan over all recurrences, trying to fold loop invariants into them. 2872for (;
Idx < Ops.
size() && isa<SCEVAddRecExpr>(Ops[
Idx]); ++
Idx) {
2873// Scan all of the other operands to this add and add them to the vector if 2874// they are loop invariant w.r.t. the recurrence. 2878for (
unsigned i = 0, e = Ops.
size(); i != e; ++i)
2885// If we found some loop invariants, fold them into the recurrence. 2886if (!LIOps.
empty()) {
2887// Compute nowrap flags for the addition of the loop-invariant ops and 2888// the addrec. Temporarily push it as an operand for that purpose. These 2889// flags are valid in the scope of the addrec only. 2894// NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} 2899// It is not in general safe to propagate flags valid on an add within 2900// the addrec scope to one outside it. We must prove that the inner 2901// scope is guaranteed to execute if the outer one does to be able to 2902// safely propagate. We know the program is undefined if poison is 2903// produced on the inner scoped addrec. We also know that *for this use* 2904// the outer scoped add can't overflow (because of the flags we just 2905// computed for the inner scoped add) without the program being undefined. 2906// Proving that entry to the outer scope neccesitates entry to the inner 2907// scope, thus proves the program undefined if the flags would be violated 2908// in the outer scope. 2911auto *DefI = getDefiningScopeBound(LIOps);
2913if (!isGuaranteedToTransferExecutionTo(DefI, ReachI))
2918// Build the new addrec. Propagate the NUW and NSW flags if both the 2919// outer add and the inner addrec are guaranteed to have no overflow. 2920// Always propagate NW. 2924// If all of the other operands were loop invariant, we are done. 2925if (Ops.
size() == 1)
return NewRec;
2927// Otherwise, add the folded AddRec by the non-invariant parts. 2928for (
unsigned i = 0;; ++i)
2929if (Ops[i] == AddRec) {
2936// Okay, if there weren't any loop invariants to be folded, check to see if 2937// there are multiple AddRec's with the same loop induction variable being 2938// added together. If so, we can fold them. 2939for (
unsigned OtherIdx =
Idx+1;
2940 OtherIdx < Ops.
size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2942// We expect the AddRecExpr's to be sorted in reverse dominance order, 2943// so that the 1st found AddRecExpr is dominated by all others. 2945 cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
2947"AddRecExprs are not sorted in reverse dominance order?");
2948if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
2949// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> 2951for (; OtherIdx != Ops.
size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2953constauto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
2954if (OtherAddRec->getLoop() == AddRecLoop) {
2955for (
unsigned i = 0, e = OtherAddRec->getNumOperands();
2957if (i >= AddRecOps.
size()) {
2958append_range(AddRecOps, OtherAddRec->operands().drop_front(i));
2962 AddRecOps[i], OtherAddRec->getOperand(i)};
2965 Ops.
erase(Ops.
begin() + OtherIdx); --OtherIdx;
2968// Step size has changed, so we cannot guarantee no self-wraparound. 2974// Otherwise couldn't fold anything into this recurrence. Move onto the 2978// Okay, it looks like we really DO need an add expr. Check to see if we 2979// already have one, otherwise create a new one. 2980return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
2992static_cast<SCEVAddExpr *
>(UniqueSCEVs.FindNodeOrInsertPos(
ID, IP));
2995 std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2996 S =
new (SCEVAllocator)
2998 UniqueSCEVs.InsertNode(S, IP);
3018 std::uninitialized_copy(Ops.begin(), Ops.end(), O);
3019 S =
new (SCEVAllocator)
3021 UniqueSCEVs.InsertNode(S, IP);
3022 LoopUsers[
L].push_back(S);
3038static_cast<SCEVMulExpr *
>(UniqueSCEVs.FindNodeOrInsertPos(
ID, IP));
3041 std::uninitialized_copy(Ops.begin(), Ops.end(), O);
3044 UniqueSCEVs.InsertNode(S, IP);
3053if (j > 1 && k / j != i) Overflow =
true;
3057/// Compute the result of "n choose k", the binomial coefficient. If an 3058/// intermediate computation overflows, Overflow will be set and the return will 3059/// be garbage. Overflow is not cleared on absence of overflow. 3061// We use the multiplicative formula: 3062// n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . 3063// At each iteration, we take the n-th term of the numeral and divide by the 3064// (k-n)th term of the denominator. This division will always produce an 3065// integral result, and helps reduce the chance of overflow in the 3066// intermediate computations. However, we can still overflow even when the 3067// final result would fit. 3069if (n == 0 || n == k)
return 1;
3077 r =
umul_ov(r, n-(i-1), Overflow);
3083/// Determine if any of the operands in this SCEV are a constant or if 3084/// any of the add or multiply expressions in this SCEV contain a constant. 3086structFindConstantInAddMulChain {
3087bool FoundConstant =
false;
3089bool follow(
constSCEV *S) {
3090 FoundConstant |= isa<SCEVConstant>(S);
3091return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S);
3095return FoundConstant;
3099 FindConstantInAddMulChain
F;
3101 ST.visitAll(StartExpr);
3102returnF.FoundConstant;
3105/// Get a canonical multiply expression, or something simpler if possible. 3110"only nuw or nsw allowed");
3112if (Ops.
size() == 1)
return Ops[0];
3114Type *ETy = Ops[0]->getType();
3116for (
unsigned i = 1, e = Ops.
size(); i != e; ++i)
3118"SCEVMulExpr operand types don't match!");
3123 [](
constAPInt &C1,
constAPInt &C2) {
return C1 * C2; },
3124 [](
constAPInt &
C) {
returnC.isOne(); },
// identity 3125 [](
constAPInt &
C) {
returnC.isZero(); });
// absorber 3129// Delay expensive flag strengthening until necessary. 3134// Limit recursion calls depth. 3136return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
3139// Don't strengthen flags if we have no new information. 3141if (
Mul->getNoWrapFlags(OrigFlags) != OrigFlags)
3142Mul->setNoWrapFlags(ComputeFlags(Ops));
3146if (
constSCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
3147if (Ops.
size() == 2) {
3148// C1*(C2+V) -> C1*C2 + C1*V 3150// If any of Add's ops are Adds or Muls with a constant, apply this 3151// transformation as well. 3153// TODO: There are some cases where this transformation is not 3154// profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of 3155// this transformation should be narrowed down. 3164if (Ops[0]->isAllOnesValue()) {
3165// If we have a mul by -1 of an add, try distributing the -1 among the 3169bool AnyFolded =
false;
3170for (
constSCEV *AddOp :
Add->operands()) {
3173if (!isa<SCEVMulExpr>(
Mul)) AnyFolded =
true;
3178 }
elseif (
constauto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
3179// Negation preserves a recurrence's no self-wrap property. 3184// Let M be the minimum representable signed value. AddRec with nsw 3185// multiplied by -1 can have signed overflow if and only if it takes a 3186// value of M: M * (-1) would stay M and (M + 1) * (-1) would be the 3187// maximum signed value. In all other cases signed overflow is 3197 AddRec->getNoWrapFlags(FlagsMask));
3203// Skip over the add expression until we get to a multiply. 3208// If there are mul operands inline them all into this expression. 3210bool DeletedMul =
false;
3214// If we have an mul, expand the mul operands onto the end of the 3221// If we deleted at least one mul, we added operands to the end of the 3222// list, and they are not necessarily sorted. Recurse to resort and 3223// resimplify any operands we just acquired. 3228// If there are any add recurrences in the operands list, see if any other 3229// added values are loop invariant. If so, we can fold them into the 3234// Scan over all recurrences, trying to fold loop invariants into them. 3235for (;
Idx < Ops.
size() && isa<SCEVAddRecExpr>(Ops[
Idx]); ++
Idx) {
3236// Scan all of the other operands to this mul and add them to the vector 3237// if they are loop invariant w.r.t. the recurrence. 3240for (
unsigned i = 0, e = Ops.
size(); i != e; ++i)
3247// If we found some loop invariants, fold them into the recurrence. 3248if (!LIOps.
empty()) {
3249// NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} 3254// If both the mul and addrec are nuw, we can preserve nuw. 3255// If both the mul and addrec are nsw, we can only preserve nsw if either 3256// a) they are also nuw, or 3257// b) all multiplications of addrec operands with scale are nsw. 3276// If all of the other operands were loop invariant, we are done. 3277if (Ops.
size() == 1)
return NewRec;
3279// Otherwise, multiply the folded AddRec by the non-invariant parts. 3280for (
unsigned i = 0;; ++i)
3281if (Ops[i] == AddRec) {
3288// Okay, if there weren't any loop invariants to be folded, check to see 3289// if there are multiple AddRec's with the same loop induction variable 3290// being multiplied together. If so, we can fold them. 3292// {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> 3293// = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ 3294// choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z 3295// ]]],+,...up to x=2n}. 3296// Note that the arguments to choose() are always integers with values 3297// known at compile time, never SCEV objects. 3299// The implementation avoids pointless extra computations when the two 3300// addrec's are of different length (mathematically, it's equivalent to 3301// an infinite stream of zeros on the right). 3302bool OpsModified =
false;
3303for (
unsigned OtherIdx =
Idx+1;
3304 OtherIdx != Ops.
size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
3307 dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
3311// Limit max number of arguments to avoid creation of unreasonably big 3312// SCEVAddRecs with very complex operands. 3317bool Overflow =
false;
3323 SmallVector <const SCEV *, 7> SumOps;
3324for (
int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
3328 z < ze && !Overflow; ++z) {
3331if (LargerThan64Bits)
3332 Coeff =
umul_ov(Coeff1, Coeff2, Overflow);
3334 Coeff = Coeff1*Coeff2;
3349if (Ops.
size() == 2)
return NewAddRec;
3350 Ops[
Idx] = NewAddRec;
3351 Ops.
erase(Ops.
begin() + OtherIdx); --OtherIdx;
3353 AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
3361// Otherwise couldn't fold anything into this recurrence. Move onto the 3365// Okay, it looks like we really DO need an mul expr. Check to see if we 3366// already have one, otherwise create a new one. 3367return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
3370/// Represents an unsigned remainder expression based on unsigned division. 3375"SCEVURemExpr operand types don't match!");
3377// Short-circuit easy cases 3379// If constant is one, the result is trivial 3380if (RHSC->getValue()->isOne())
3383// If constant is a power of two, fold into a zext(trunc(LHS)). 3384if (RHSC->getAPInt().isPowerOf2()) {
3392// Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y) 3398/// Get a canonical unsigned division expression, or something simpler if 3403"SCEVUDivExpr operand can't be pointer!");
3405"SCEVUDivExpr operand types don't match!");
3412if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
3420if (RHSC->getValue()->isOne())
3421returnLHS;
// X udiv 1 --> x 3422// If the denominator is zero, the result of the udiv is undefined. Don't 3423// try to analyze it, because the resolution chosen here may differ from 3424// the resolution chosen in other parts of the compiler. 3425if (!RHSC->getValue()->isZero()) {
3426// Determine if the division can be folded into the operands of 3428// TODO: Generalize this to non-constants by using known-bits information. 3430unsigned LZ = RHSC->getAPInt().countl_zero();
3432// For non-power-of-two values, effectively round the value up to the 3433// nearest power of two. 3434if (!RHSC->getAPInt().isPowerOf2())
3440 dyn_cast<SCEVConstant>(AR->getStepRecurrence(*
this))) {
3441// {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. 3442constAPInt &StepInt = Step->getAPInt();
3443constAPInt &DivInt = RHSC->getAPInt();
3444if (!StepInt.
urem(DivInt) &&
3450for (
constSCEV *
Op : AR->operands())
3454 /// Get a canonical UDivExpr for a recurrence. 3455 /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. 3456// We can currently only fold X%N if X is constant. 3457constSCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
3458if (StartC && !DivInt.
urem(StepInt) &&
3464constAPInt &StartRem = StartInt.
urem(StepInt);
3472// Reset the ID to include the new LHS, and check if it is 3479if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
3485// (A*B)/C --> A*(B/C) if safe and B/C can be folded. 3488for (
constSCEV *
Op : M->operands())
3491// Find an operand that's safely divisible. 3492for (
unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
3493constSCEV *
Op = M->getOperand(i);
3495if (!isa<SCEVUDivExpr>(Div) &&
getMulExpr(Div, RHSC) ==
Op) {
3503// (A/B)/C --> A/(B*C) if safe and B*C can be folded. 3505if (
auto *DivisorConstant =
3506 dyn_cast<SCEVConstant>(OtherDiv->getRHS())) {
3507bool Overflow =
false;
3509 DivisorConstant->getAPInt().
umul_ov(RHSC->getAPInt(), Overflow);
3517// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. 3520for (
constSCEV *
Op :
A->operands())
3524for (
unsigned i = 0, e =
A->getNumOperands(); i != e; ++i) {
3526if (isa<SCEVUDivExpr>(
Op) ||
3531if (
Operands.size() ==
A->getNumOperands())
3536// Fold if both operands are constant. 3538returngetConstant(LHSC->getAPInt().udiv(RHSC->getAPInt()));
3542// ((-C + (C smax %x)) /u %x) evaluates to zero, for any positive constant C. 3543if (
constauto *AE = dyn_cast<SCEVAddExpr>(
LHS);
3544 AE && AE->getNumOperands() == 2) {
3545if (
constauto *VC = dyn_cast<SCEVConstant>(AE->getOperand(0))) {
3546constAPInt &NegC = VC->getAPInt();
3548constauto *MME = dyn_cast<SCEVSMaxExpr>(AE->getOperand(1));
3549if (MME && MME->getNumOperands() == 2 &&
3550 isa<SCEVConstant>(MME->getOperand(0)) &&
3551 cast<SCEVConstant>(MME->getOperand(0))->getAPInt() == -NegC &&
3552 MME->getOperand(1) ==
RHS)
3558// The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs 3559// changes). Make sure we get a new one. 3561if (
constSCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP))
return S;
3564 UniqueSCEVs.InsertNode(S, IP);
3583/// Get a canonical unsigned division expression, or something simpler if 3584/// possible. There is no representation for an exact udiv in SCEV IR, but we 3585/// can attempt to remove factors from the LHS and RHS. We can't do this when 3586/// it's not exact because the udiv may be clearing bits. 3589// TODO: we could try to find factors in all sorts of things, but for now we 3590// just deal with u/exact (multiply, constant). See SCEVDivision towards the 3591// end of this file for inspiration. 3594if (!
Mul || !
Mul->hasNoUnsignedWrap())
3598// If the mulexpr multiplies by a constant, then that constant must be the 3599// first element of the mulexpr. 3600if (
constauto *LHSCst = dyn_cast<SCEVConstant>(
Mul->getOperand(0))) {
3601if (LHSCst == RHSCst) {
3606// We can't just assume that LHSCst divides RHSCst cleanly, it could be 3607// that there's a factor provided by one of the other terms. We need to 3612 cast<SCEVConstant>(
getConstant(LHSCst->getAPInt().udiv(Factor)));
3614 cast<SCEVConstant>(
getConstant(RHSCst->getAPInt().udiv(Factor)));
3620Mul = dyn_cast<SCEVMulExpr>(
LHS);
3627for (
int i = 0, e =
Mul->getNumOperands(); i != e; ++i) {
3628if (
Mul->getOperand(i) ==
RHS) {
3639/// Get an add recurrence expression for the specified loop. Simplify the 3640/// expression as much as possible. 3646if (
constSCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
3647if (StepChrec->getLoop() == L) {
3656/// Get an add recurrence expression for the specified loop. Simplify the 3657/// expression as much as possible. 3666"SCEVAddRecExpr operand types don't match!");
3667assert(!
Op->getType()->isPointerTy() &&
"Step must be integer");
3671"SCEVAddRecExpr operand is not available at loop entry!");
3679// It's tempting to want to call getConstantMaxBackedgeTakenCount count here and 3680// use that information to infer NUW and NSW flags. However, computing a 3681// BE count requires calling getAddRecExpr, so we may not yet have a 3682// meaningful BE count at this point (and if we don't, we'd be stuck 3683// with a SCEVCouldNotCompute as the cached BE count). 3687// Canonicalize nested AddRecs in by nesting them in order of loop depth. 3689constLoop *NestedLoop = NestedAR->getLoop();
3690if (L->contains(NestedLoop)
3696// AddRecs require their operands be loop-invariant with respect to their 3697// loops. Don't perform this transformation if it would break this 3699bool AllInvariant =
all_of(
3703// Create a recurrence for the outer loop with the same step size. 3705// The outer recurrence keeps its NW flag but only keeps NUW/NSW if the 3706// inner recurrence has the same property. 3711 AllInvariant =
all_of(NestedOperands, [&](
constSCEV *
Op) {
3716// Ok, both add recurrences are valid after the transformation. 3718// The inner recurrence keeps its NW flag but only keeps NUW/NSW if 3719// the outer recurrence has the same property. 3725// Reset Operands to its original state. 3730// Okay, it looks like we really DO need an addrec expr. Check to see if we 3731// already have one, otherwise create a new one. 3732return getOrCreateAddRecExpr(
Operands, L, Flags);
3739// getSCEV(Base)->getType() has the same address space as Base->getType() 3740// because SCEV::getType() preserves the address space. 3744// We'd like to propagate flags from the IR to the corresponding SCEV nodes, 3745// but to do that, we have to ensure that said flag is valid in the entire 3746// defined scope of the SCEV. 3747// TODO: non-instructions have global scope. We might be able to prove 3748// some global scope cases 3749auto *GEPI = dyn_cast<Instruction>(
GEP);
3750if (!GEPI || !isSCEVExprNeverPoison(GEPI))
3761bool FirstIter =
true;
3763for (
constSCEV *IndexExpr : IndexExprs) {
3764// Compute the (potentially symbolic) offset in bytes for this index. 3765if (
StructType *STy = dyn_cast<StructType>(CurTy)) {
3766// For a struct, add the member offset. 3767ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
3768unsigned FieldNo = Index->getZExtValue();
3770 Offsets.push_back(FieldOffset);
3772// Update CurTy to the type of the field at Index. 3773 CurTy = STy->getTypeAtIndex(Index);
3775// Update CurTy to its element type. 3777assert(isa<PointerType>(CurTy) &&
3778"The first index of a GEP indexes a pointer");
3779 CurTy =
GEP->getSourceElementType();
3784// For an array, add the element offset, explicitly scaled. 3786// Getelementptr indices are signed. 3789// Multiply the index by the element size to compute the element offset. 3790constSCEV *LocalOffset =
getMulExpr(IndexExpr, ElementSize, OffsetWrap);
3791 Offsets.push_back(LocalOffset);
3795// Handle degenerate case of GEP without offsets. 3799// Add the offsets together, assuming nsw if inbounds. 3801// Add the base address and the offset. We cannot use the nsw flag, as the 3802// base address is unsigned. However, if we know that the offset is 3803// non-negative, we can use nuw. 3809"GEP should not change type mid-flight.");
3813SCEV *ScalarEvolution::findExistingSCEVInCache(
SCEVTypes SCEVType,
3816ID.AddInteger(SCEVType);
3820return UniqueSCEVs.FindNodeOrInsertPos(
ID, IP);
3830assert(SCEVMinMaxExpr::isMinMaxType(Kind) &&
"Not a SCEVMinMaxExpr!");
3831assert(!Ops.
empty() &&
"Cannot get empty (u|s)(min|max)!");
3832if (Ops.
size() == 1)
return Ops[0];
3835for (
unsigned i = 1, e = Ops.
size(); i != e; ++i) {
3837"Operand types don't match!");
3840"min/max should be consistently pointerish");
3866return IsSigned ?
C.isMinSignedValue() :
C.isMinValue();
3868return IsSigned ?
C.isMaxSignedValue() :
C.isMaxValue();
3873return IsSigned ?
C.isMaxSignedValue() :
C.isMaxValue();
3875return IsSigned ?
C.isMinSignedValue() :
C.isMinValue();
3880// Check if we have created the same expression before. 3881if (
constSCEV *S = findExistingSCEVInCache(Kind, Ops)) {
3885// Find the first operation of the same kind 3887while (
Idx < Ops.
size() && Ops[
Idx]->getSCEVType() < Kind)
3890// Check to see if one of the operands is of the same kind. If so, expand its 3891// operands onto our operand list, and recurse to simplify. 3893bool DeletedAny =
false;
3894while (Ops[
Idx]->getSCEVType() == Kind) {
3905// Okay, check to see if the same value occurs in the operand list twice. If 3906// so, delete one. Since we sorted the list, these values are required to 3914for (
unsigned i = 0, e = Ops.
size() - 1; i != e; ++i) {
3915if (Ops[i] == Ops[i + 1] ||
3916 isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
3917// X op Y op Y --> X op Y 3918// X op Y --> X, if we know X, Y are ordered appropriately 3922 }
elseif (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
3924// X op Y --> Y, if we know X, Y are ordered appropriately 3931if (Ops.
size() == 1)
return Ops[0];
3933assert(!Ops.
empty() &&
"Reduced smax down to nothing!");
3935// Okay, it looks like we really DO need an expr. Check to see if we 3936// already have one, otherwise create a new one. 3942constSCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP);
3946 std::uninitialized_copy(Ops.
begin(), Ops.
end(), O);
3947SCEV *S =
new (SCEVAllocator)
3950 UniqueSCEVs.InsertNode(S, IP);
3957classSCEVSequentialMinMaxDeduplicatingVisitor final
3958 :
publicSCEVVisitor<SCEVSequentialMinMaxDeduplicatingVisitor,
3959 std::optional<const SCEV *>> {
3960usingRetVal = std::optional<const SCEV *>;
3964constSCEVTypes RootKind;
// Must be a sequential min/max expression. 3965constSCEVTypes NonSequentialRootKind;
// Non-sequential variant of RootKind. 3968bool canRecurseInto(
SCEVTypes Kind)
const{
3969// We can only recurse into the SCEV expression of the same effective type 3970// as the type of our root SCEV expression. 3971return RootKind == Kind || NonSequentialRootKind == Kind;
3974 RetVal visitAnyMinMaxExpr(
constSCEV *S) {
3975assert((isa<SCEVMinMaxExpr>(S) || isa<SCEVSequentialMinMaxExpr>(S)) &&
3976"Only for min/max expressions.");
3979if (!canRecurseInto(Kind))
3982auto *NAry = cast<SCEVNAryExpr>(S);
3984bool Changed =
visit(Kind, NAry->operands(), NewOps);
3991return isa<SCEVSequentialMinMaxExpr>(S)
3997// Has the whole operand been seen already? 3998if (!SeenOps.
insert(S).second)
4000return Base::visit(S);
4006 : SE(SE), RootKind(RootKind),
4007 NonSequentialRootKind(
4017for (
constSCEV *
Op : OrigOps) {
4026 NewOps = std::move(Ops);
4032 RetVal visitVScale(
constSCEVVScale *VScale) {
return VScale; }
4042 RetVal visitAddExpr(
constSCEVAddExpr *Expr) {
return Expr; }
4044 RetVal visitMulExpr(
constSCEVMulExpr *Expr) {
return Expr; }
4046 RetVal visitUDivExpr(
constSCEVUDivExpr *Expr) {
return Expr; }
4048 RetVal visitAddRecExpr(
constSCEVAddRecExpr *Expr) {
return Expr; }
4051return visitAnyMinMaxExpr(Expr);
4055return visitAnyMinMaxExpr(Expr);
4059return visitAnyMinMaxExpr(Expr);
4063return visitAnyMinMaxExpr(Expr);
4067return visitAnyMinMaxExpr(Expr);
4070 RetVal visitUnknown(
constSCEVUnknown *Expr) {
return Expr; }
4094// If any operand is poison, the whole expression is poison. 4097// FIXME: if the *first* operand is poison, the whole expression is poison. 4098returnfalse;
// Pessimistically, say that it does not propagate poison. 4106// The only way poison may be introduced in a SCEV expression is from a 4107// poison SCEVUnknown (ConstantExprs are also represented as SCEVUnknown, 4108// not SCEVConstant). Notably, nowrap flags in SCEV nodes can *not* 4109// introduce poison -- they encode guaranteed, non-speculated knowledge. 4111// Additionally, all SCEV nodes propagate poison from inputs to outputs, 4112// with the notable exception of umin_seq, where only poison from the first 4113// operand is (unconditionally) propagated. 4114structSCEVPoisonCollector {
4115bool LookThroughMaybePoisonBlocking;
4117 SCEVPoisonCollector(
bool LookThroughMaybePoisonBlocking)
4118 : LookThroughMaybePoisonBlocking(LookThroughMaybePoisonBlocking) {}
4120bool follow(
constSCEV *S) {
4121if (!LookThroughMaybePoisonBlocking &&
4125if (
auto *SU = dyn_cast<SCEVUnknown>(S)) {
4131bool isDone()
const{
returnfalse; }
4135/// Return true if V is poison given that AssumedPoison is already poison. 4137// First collect all SCEVs that might result in AssumedPoison to be poison. 4138// We need to look through potentially poison-blocking operations here, 4139// because we want to find all SCEVs that *might* result in poison, not only 4140// those that are *required* to. 4141 SCEVPoisonCollector PC1(
/* LookThroughMaybePoisonBlocking */true);
4144// AssumedPoison is never poison. As the assumption is false, the implication 4145// is true. Don't bother walking the other SCEV in this case. 4146if (PC1.MaybePoison.empty())
4149// Collect all SCEVs in S that, if poison, *will* result in S being poison 4150// as well. We cannot look through potentially poison-blocking operations 4151// here, as their arguments only *may* make the result poison. 4152 SCEVPoisonCollector PC2(
/* LookThroughMaybePoisonBlocking */false);
4155// Make sure that no matter which SCEV in PC1.MaybePoison is actually poison, 4156// it will also make S poison by being part of PC2.MaybePoison. 4162 SCEVPoisonCollector PC(
/* LookThroughMaybePoisonBlocking */false);
4171// If the instruction cannot be poison, it's always safe to reuse. 4175// Otherwise, it is possible that I is more poisonous that S. Collect the 4176// poison-contributors of S, and then check whether I has any additional 4177// poison-contributors. Poison that is contributed through poison-generating 4178// flags is handled by dropping those flags instead. 4185while (!Worklist.
empty()) {
4187if (!Visited.
insert(V).second)
4190// Avoid walking large instruction graphs. 4191if (Visited.
size() > 16)
4194// Either the value can't be poison, or the S would also be poison if it 4196if (PoisonVals.
contains(V) || ::isGuaranteedNotToBePoison(V))
4199auto *
I = dyn_cast<Instruction>(V);
4203// Disjoint or instructions are interpreted as adds by SCEV. However, we 4204// can't replace an arbitrary add with disjoint or, even if we drop the 4205// flag. We would need to convert the or into an add. 4206if (
auto *PDI = dyn_cast<PossiblyDisjointInst>(
I))
4207if (PDI->isDisjoint())
4210// FIXME: Ignore vscale, even though it technically could be poison. Do this 4211// because SCEV currently assumes it can't be poison. Remove this special 4212// case once we proper model when vscale can be poison. 4213if (
auto *
II = dyn_cast<IntrinsicInst>(
I);
4214II &&
II->getIntrinsicID() == Intrinsic::vscale)
4220// If the instruction can't create poison, we can recurse to its operands. 4221if (
I->hasPoisonGeneratingAnnotations())
4233assert(SCEVSequentialMinMaxExpr::isSequentialMinMaxType(Kind) &&
4234"Not a SCEVSequentialMinMaxExpr!");
4235assert(!Ops.
empty() &&
"Cannot get empty (u|s)(min|max)!");
4240for (
unsigned i = 1, e = Ops.
size(); i != e; ++i) {
4242"Operand types don't match!");
4245"min/max should be consistently pointerish");
4249// Note that SCEVSequentialMinMaxExpr is *NOT* commutative, 4250// so we can *NOT* do any kind of sorting of the expressions! 4252// Check if we have created the same expression before. 4253if (
constSCEV *S = findExistingSCEVInCache(Kind, Ops))
4256// FIXME: there are *some* simplifications that we can do here. 4258// Keep only the first instance of an operand. 4260 SCEVSequentialMinMaxDeduplicatingVisitor Deduplicator(*
this, Kind);
4261bool Changed = Deduplicator.visit(Kind, Ops, Ops);
4266// Check to see if one of the operands is of the same kind. If so, expand its 4267// operands onto our operand list, and recurse to simplify. 4270bool DeletedAny =
false;
4272if (Ops[
Idx]->getSCEVType() != Kind) {
4276constauto *SMME = cast<SCEVSequentialMinMaxExpr>(Ops[
Idx]);
4279 SMME->operands().end());
4287constSCEV *SaturationPoint;
4298for (
unsigned i = 1, e = Ops.
size(); i != e; ++i) {
4299if (!isGuaranteedNotToCauseUB(Ops[i]))
4301// We can replace %x umin_seq %y with %x umin %y if either: 4302// * %y being poison implies %x is also poison. 4303// * %x cannot be the saturating value (e.g. zero for umin). 4314// Fold %x umin_seq %y to %x if %x ule %y. 4315// TODO: We might be able to prove the predicate for a later operand. 4316if (isKnownViaNonRecursiveReasoning(Pred, Ops[i - 1], Ops[i])) {
4322// Okay, it looks like we really DO need an expr. Check to see if we 4323// already have one, otherwise create a new one. 4329constSCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP);
4334 std::uninitialized_copy(Ops.
begin(), Ops.
end(), O);
4335SCEV *S =
new (SCEVAllocator)
4338 UniqueSCEVs.InsertNode(S, IP);
4386if (
Size.isScalable())
4402// We can bypass creating a target-independent constant expression and then 4403// folding it back into a ConstantInt. This is just a compile-time 4407"Cannot get offset for structure containing scalable vector types");
4412// Don't attempt to do anything other than create a SCEVUnknown object 4413// here. createSCEV only calls getUnknown after checking for all other 4414// interesting possibilities, and any other code that calls getUnknown 4415// is doing so in order to hide a value from SCEV canonicalization. 4421if (
SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(
ID, IP)) {
4422assert(cast<SCEVUnknown>(S)->getValue() == V &&
4423"Stale SCEVUnknown in uniquing map!");
4428 FirstUnknown = cast<SCEVUnknown>(S);
4429 UniqueSCEVs.InsertNode(S, IP);
4433//===----------------------------------------------------------------------===// 4434// Basic SCEV Analysis and PHI Idiom Recognition Code 4437/// Test if values of the given type are analyzable within the SCEV 4438/// framework. This primarily includes integer types, and it can optionally 4439/// include pointer types if the ScalarEvolution class has access to 4440/// target-specific information. 4442// Integers and pointers are always SCEVable. 4446/// Return the size in bits of the specified type, for which isSCEVable must 4455/// Return a type with the same bitwidth as the given type and which represents 4456/// how SCEV will treat the given type, for which isSCEVable must return 4457/// true. For pointer types, this is the pointer index sized integer type. 4464// The only other support type is pointer. 4475 /// For a valid use point to exist, the defining scope of one operand 4476 /// must dominate the other. 4477bool PreciseA, PreciseB;
4478auto *ScopeA = getDefiningScopeBound({
A}, PreciseA);
4479auto *ScopeB = getDefiningScopeBound({
B}, PreciseB);
4480if (!PreciseA || !PreciseB)
4483return (ScopeA == ScopeB) || DT.
dominates(ScopeA, ScopeB) ||
4488return CouldNotCompute.get();
4491bool ScalarEvolution::checkValidity(
constSCEV *S)
const{
4493auto *SU = dyn_cast<SCEVUnknown>(S);
4494return SU && SU->getValue() ==
nullptr;
4497return !ContainsNulls;
4502if (
I != HasRecMap.
end())
4507 HasRecMap.
insert({S, FoundAddRec});
4511/// Return the ValueOffsetPair set for \p S. \p S can be represented 4512/// by the value and offset from any ValueOffsetPair in the set. 4515if (SI == ExprValueMap.
end())
4517return SI->second.getArrayRef();
4520/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V) 4521/// cannot be used separately. eraseValueFromMap should be used to remove 4522/// V from ValueExprMap and ExprValueMap at the same time. 4523void ScalarEvolution::eraseValueFromMap(
Value *V) {
4525if (
I != ValueExprMap.
end()) {
4526auto EVIt = ExprValueMap.
find(
I->second);
4527bool Removed = EVIt->second.remove(V);
4529assert(Removed &&
"Value not in ExprValueMap?");
4534void ScalarEvolution::insertValueToMap(
Value *V,
constSCEV *S) {
4535// A recursive query may have already computed the SCEV. It should be 4536// equivalent, but may not necessarily be exactly the same, e.g. due to lazily 4537// inferred nowrap flags. 4538auto It = ValueExprMap.
find_as(V);
4539if (It == ValueExprMap.
end()) {
4541 ExprValueMap[S].
insert(V);
4545/// Return an existing SCEV if it exists, otherwise analyze the expression and 4546/// create a new one. 4552return createSCEVIter(V);
4559if (
I != ValueExprMap.
end()) {
4560constSCEV *S =
I->second;
4561assert(checkValidity(S) &&
4562"existing SCEV has not been properly invalidated");
4568/// Return a SCEV corresponding to -V = -1*V 4575Type *Ty = V->getType();
4580/// If Expr computes ~A, return A else return nullptr 4583if (!
Add ||
Add->getNumOperands() != 2 ||
4584 !
Add->getOperand(0)->isAllOnesValue())
4587constSCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(
Add->getOperand(1));
4595/// Return a SCEV corresponding to ~V = -1-V 4597assert(!V->getType()->isPointerTy() &&
"Can't negate pointer");
4603// Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y) 4610return (
constSCEV *)
nullptr;
4616if (
constSCEV *Replaced = MatchMinMaxNegation(MME))
4620Type *Ty = V->getType();
4626assert(
P->getType()->isPointerTy());
4628if (
auto *AddRec = dyn_cast<SCEVAddRecExpr>(
P)) {
4629// The base of an AddRec is the first operand. 4632// Don't try to transfer nowrap flags for now. We could in some cases 4633// (for example, if pointer operand of the AddRec is a SCEVUnknown). 4636if (
auto *
Add = dyn_cast<SCEVAddExpr>(
P)) {
4637// The base of an Add is the pointer operand. 4639constSCEV **PtrOp =
nullptr;
4640for (
constSCEV *&AddOp : Ops) {
4641if (AddOp->getType()->isPointerTy()) {
4642assert(!PtrOp &&
"Cannot have multiple pointer ops");
4647// Don't try to transfer nowrap flags for now. We could in some cases 4648// (for example, if the pointer operand of the Add is a SCEVUnknown). 4651// Any other expression must be a pointer base. 4658// Fast path: X - X --> 0. 4662// If we subtract two pointers with different pointer bases, bail. 4663// Eventually, we're going to add an assertion to getMulExpr that we 4664// can't multiply by a pointer. 4673// We represent LHS - RHS as LHS + (-1)*RHS. This transformation 4674// makes it so that we cannot make much use of NUW. 4676constbool RHSIsNotMinSigned =
4679// Let M be the minimum representable signed value. Then (-1)*RHS 4680// signed-wraps if and only if RHS is M. That can happen even for 4681// a NSW subtraction because e.g. (-1)*M signed-wraps even though 4682// -1 - M does not. So to transfer NSW from LHS - RHS to LHS + 4683// (-1)*RHS, we need to prove that RHS != M. 4685// If LHS is non-negative and we know that LHS - RHS does not 4686// signed-wrap, then RHS cannot be M. So we can rule out signed-wrap 4687// either by proving that RHS > M or that LHS >= 0. 4693// FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - 4694// RHS is NSW and LHS >= 0. 4696// The difficulty here is that the NSW flag may have been proven 4697// relative to a loop that is to be found in a recurrence in LHS and 4698// not in RHS. Applying NSW to (-1)*M may then let the NSW have a 4699// larger scope than intended. 4707Type *SrcTy = V->getType();
4709"Cannot truncate or zero extend with non-integer arguments!");
4711return V;
// No conversion 4719Type *SrcTy = V->getType();
4721"Cannot truncate or zero extend with non-integer arguments!");
4723return V;
// No conversion 4731Type *SrcTy = V->getType();
4733"Cannot noop or zero extend with non-integer arguments!");
4735"getNoopOrZeroExtend cannot truncate!");
4737return V;
// No conversion 4743Type *SrcTy = V->getType();
4745"Cannot noop or sign extend with non-integer arguments!");
4747"getNoopOrSignExtend cannot truncate!");
4749return V;
// No conversion 4755Type *SrcTy = V->getType();
4757"Cannot noop or any extend with non-integer arguments!");
4759"getNoopOrAnyExtend cannot truncate!");
4761return V;
// No conversion 4767Type *SrcTy = V->getType();
4769"Cannot truncate or noop with non-integer arguments!");
4771"getTruncateOrNoop cannot extend!");
4773return V;
// No conversion 4800assert(!Ops.
empty() &&
"At least one operand must be!");
4805// Find the max type first. 4806Type *MaxType =
nullptr;
4807for (
constauto *S : Ops)
4812assert(MaxType &&
"Failed to find maximum type!");
4814// Extend all ops to max type. 4816for (
constauto *S : Ops)
4824// A pointer operand may evaluate to a nonpointer expression, such as null. 4825if (!V->getType()->isPointerTy())
4829if (
auto *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
4830 V = AddRec->getStart();
4831 }
elseif (
auto *
Add = dyn_cast<SCEVAddExpr>(V)) {
4832constSCEV *PtrOp =
nullptr;
4833for (
constSCEV *AddOp :
Add->operands()) {
4834if (AddOp->getType()->isPointerTy()) {
4835assert(!PtrOp &&
"Cannot have multiple pointer ops");
4839assert(PtrOp &&
"Must have pointer op");
4841 }
else// Not something we can look further into. 4846/// Push users of the given Instruction onto the given Worklist. 4850// Push the def-use children onto the Worklist stack. 4851for (
User *U :
I->users()) {
4852auto *UserInsn = cast<Instruction>(U);
4853if (Visited.
insert(UserInsn).second)
4860/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start 4861/// expression in case its Loop is L. If it is not L then 4862/// if IgnoreOtherLoops is true then use AddRec itself 4863/// otherwise rewrite cannot be done. 4864/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. 4868bool IgnoreOtherLoops =
true) {
4871if (
Rewriter.hasSeenLoopVariantSCEVUnknown())
4873returnRewriter.hasSeenOtherLoops() && !IgnoreOtherLoops
4880 SeenLoopVariantSCEVUnknown =
true;
4885// Only re-write AddRecExprs for this loop. 4888 SeenOtherLoops =
true;
4892bool hasSeenLoopVariantSCEVUnknown() {
return SeenLoopVariantSCEVUnknown; }
4894bool hasSeenOtherLoops() {
return SeenOtherLoops; }
4901bool SeenLoopVariantSCEVUnknown =
false;
4902bool SeenOtherLoops =
false;
4905/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post 4906/// increment expression in case its Loop is L. If it is not L then 4907/// use AddRec itself. 4908/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. 4912 SCEVPostIncRewriter
Rewriter(L, SE);
4914returnRewriter.hasSeenLoopVariantSCEVUnknown()
4921 SeenLoopVariantSCEVUnknown =
true;
4926// Only re-write AddRecExprs for this loop. 4929 SeenOtherLoops =
true;
4933bool hasSeenLoopVariantSCEVUnknown() {
return SeenLoopVariantSCEVUnknown; }
4935bool hasSeenOtherLoops() {
return SeenOtherLoops; }
4942bool SeenLoopVariantSCEVUnknown =
false;
4943bool SeenOtherLoops =
false;
4946/// This class evaluates the compare condition by matching it against the 4947/// condition of loop latch. If there is a match we assume a true value 4948/// for the condition while building SCEV nodes. 4949classSCEVBackedgeConditionFolder
4954bool IsPosBECond =
false;
4955Value *BECond =
nullptr;
4957BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
4960"Both outgoing branches should not target same header!");
4967 SCEVBackedgeConditionFolder
Rewriter(L, BECond, IsPosBECond, SE);
4977switch (
I->getOpcode()) {
4978case Instruction::Select: {
4980 std::optional<const SCEV *> Res =
4981 compareWithBackedgeCondition(
SI->getCondition());
4983bool IsOne = cast<SCEVConstant>(*Res)->getValue()->isOne();
4989 std::optional<const SCEV *> Res = compareWithBackedgeCondition(
I);
5000explicit SCEVBackedgeConditionFolder(
constLoop *L,
Value *BECond,
5003 IsPositiveBECond(IsPosBECond) {}
5005 std::optional<const SCEV *> compareWithBackedgeCondition(
Value *IC);
5008 /// Loop back condition. 5009Value *BackedgeCond =
nullptr;
5010 /// Set to true if loop back is on positive branch condition. 5011bool IsPositiveBECond;
5014std::optional<const SCEV *>
5015SCEVBackedgeConditionFolder::compareWithBackedgeCondition(
Value *IC) {
5017// If value matches the backedge condition for loop latch, 5018// then return a constant evolution node based on loopback 5020if (BackedgeCond == IC)
5036// Only allow AddRecExprs for this loop. 5059}
// end anonymous namespace 5062ScalarEvolution::proveNoWrapViaConstantRanges(
constSCEVAddRecExpr *AR) {
5072if (
constSCEVConstant *BECountMax = dyn_cast<SCEVConstant>(BECount)) {
5074constAPInt &BECountAP = BECountMax->getAPInt();
5075unsigned NoOverflowBitWidth =
5087 Instruction::Add, IncRange, OBO::NoSignedWrap);
5088if (NSWRegion.contains(AddRecRange))
5097 Instruction::Add, IncRange, OBO::NoUnsignedWrap);
5098if (NUWRegion.contains(AddRecRange))
5106ScalarEvolution::proveNoSignedWrapViaInduction(
constSCEVAddRecExpr *AR) {
5115// This function can be expensive, only try to prove NSW once per AddRec. 5116if (!SignedWrapViaInductionTried.insert(AR).second)
5122// Check whether the backedge-taken count is SCEVCouldNotCompute. 5123// Note that this serves two purposes: It filters out loops that are 5124// simply not analyzable, and it covers the case where this code is 5125// being called from within backedge-taken count analysis, such that 5126// attempting to ask for the backedge-taken count would likely result 5127// in infinite recursion. In the later case, the analysis code will 5128// cope with a conservative value, and it will take care to purge 5129// that value once it has finished. 5132// Normally, in the cases we can prove no-overflow via a 5133// backedge guarding condition, we can also compute a backedge 5134// taken count for the loop. The exceptions are assumptions and 5135// guards present in the loop -- SCEV is not great at exploiting 5136// these to compute max backedge taken counts, but can still use 5137// these to prove lack of overflow. Use this fact to avoid 5138// doing extra work that may not pay off. 5140if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
5144// If the backedge is guarded by a comparison with the pre-inc value the 5145// addrec is safe. Also, if the entry is guarded by a comparison with the 5146// start value and the backedge is guarded by a comparison with the post-inc 5147// value, the addrec is safe. 5149constSCEV *OverflowLimit =
5159ScalarEvolution::proveNoUnsignedWrapViaInduction(
constSCEVAddRecExpr *AR) {
5168// This function can be expensive, only try to prove NUW once per AddRec. 5169if (!UnsignedWrapViaInductionTried.insert(AR).second)
5176// Check whether the backedge-taken count is SCEVCouldNotCompute. 5177// Note that this serves two purposes: It filters out loops that are 5178// simply not analyzable, and it covers the case where this code is 5179// being called from within backedge-taken count analysis, such that 5180// attempting to ask for the backedge-taken count would likely result 5181// in infinite recursion. In the later case, the analysis code will 5182// cope with a conservative value, and it will take care to purge 5183// that value once it has finished. 5186// Normally, in the cases we can prove no-overflow via a 5187// backedge guarding condition, we can also compute a backedge 5188// taken count for the loop. The exceptions are assumptions and 5189// guards present in the loop -- SCEV is not great at exploiting 5190// these to compute max backedge taken counts, but can still use 5191// these to prove lack of overflow. Use this fact to avoid 5192// doing extra work that may not pay off. 5194if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
5198// If the backedge is guarded by a comparison with the pre-inc value the 5199// addrec is safe. Also, if the entry is guarded by a comparison with the 5200// start value and the backedge is guarded by a comparison with the post-inc 5201// value, the addrec is safe. 5216/// Represents an abstract binary operation. This may exist as a 5217/// normal instruction or constant expression, or may have been 5218/// derived from an expression tree. 5226 /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or 5227 /// constant expression. 5233if (
auto *OBO = dyn_cast<OverflowingBinaryOperator>(
Op)) {
5234 IsNSW = OBO->hasNoSignedWrap();
5235 IsNUW = OBO->hasNoUnsignedWrap();
5241 : Opcode(Opcode),
LHS(
LHS),
RHS(
RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
5244}
// end anonymous namespace 5246/// Try to map \p V into a BinaryOp, and return \c std::nullopt on failure. 5251auto *
Op = dyn_cast<Operator>(V);
5255// Implementation detail: all the cleverness here should happen without 5256// creating new SCEV expressions -- our caller knowns tricks to avoid creating 5257// SCEV expressions when possible, and we should not break that. 5259switch (
Op->getOpcode()) {
5260case Instruction::Add:
5261case Instruction::Sub:
5262case Instruction::Mul:
5263case Instruction::UDiv:
5264case Instruction::URem:
5265case Instruction::And:
5266case Instruction::AShr:
5267case Instruction::Shl:
5270case Instruction::Or: {
5271// Convert or disjoint into add nuw nsw. 5272if (cast<PossiblyDisjointInst>(
Op)->isDisjoint())
5273return BinaryOp(Instruction::Add,
Op->getOperand(0),
Op->getOperand(1),
5274/*IsNSW=*/true,
/*IsNUW=*/true);
5278case Instruction::Xor:
5279if (
auto *RHSC = dyn_cast<ConstantInt>(
Op->getOperand(1)))
5280// If the RHS of the xor is a signmask, then this is just an add. 5281// Instcombine turns add of signmask into xor as a strength reduction step. 5282if (RHSC->getValue().isSignMask())
5283return BinaryOp(Instruction::Add,
Op->getOperand(0),
Op->getOperand(1));
5284// Binary `xor` is a bit-wise `add`. 5285if (V->getType()->isIntegerTy(1))
5286return BinaryOp(Instruction::Add,
Op->getOperand(0),
Op->getOperand(1));
5289case Instruction::LShr:
5290// Turn logical shift right of a constant into a unsigned divide. 5291if (
ConstantInt *SA = dyn_cast<ConstantInt>(
Op->getOperand(1))) {
5294// If the shift count is not less than the bitwidth, the result of 5295// the shift is undefined. Don't try to analyze it, because the 5296// resolution chosen here may differ from the resolution chosen in 5297// other parts of the compiler. 5300 ConstantInt::get(SA->getContext(),
5302return BinaryOp(Instruction::UDiv,
Op->getOperand(0),
X);
5307case Instruction::ExtractValue: {
5308auto *EVI = cast<ExtractValueInst>(
Op);
5309if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
5312auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
5317boolSigned = WO->isSigned();
5318// TODO: Should add nuw/nsw flags for mul as well. 5320return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());
5322// Now that we know that all uses of the arithmetic-result component of 5323// CI are guarded by the overflow check, we can go ahead and pretend 5324// that the arithmetic is non-overflowing. 5325return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
5333// Recognise intrinsic loop.decrement.reg, and as this has exactly the same 5334// semantics as a Sub, return a binary sub expression. 5335if (
auto *
II = dyn_cast<IntrinsicInst>(V))
5336if (
II->getIntrinsicID() == Intrinsic::loop_decrement_reg)
5337return BinaryOp(Instruction::Sub,
II->getOperand(0),
II->getOperand(1));
5342/// Helper function to createAddRecFromPHIWithCasts. We have a phi 5343/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via 5344/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the 5345/// way. This function checks if \p Op, an operand of this SCEVAddExpr, 5346/// follows one of the following patterns: 5347/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) 5348/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) 5349/// If the SCEV expression of \p Op conforms with one of the expected patterns 5350/// we return the type of the truncation operation, and indicate whether the 5351/// truncated type should be treated as signed/unsigned by setting 5352/// \p Signed to true/false, respectively. 5355// The case where Op == SymbolicPHI (that is, with no type conversions on 5356// the way) is handled by the regular add recurrence creating logic and 5357// would have already been triggered in createAddRecForPHI. Reaching it here 5358// means that createAddRecFromPHI had failed for this PHI before (e.g., 5359// because one of the other operands of the SCEVAddExpr updating this PHI is 5362// Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in 5363// this case predicates that allow us to prove that Op == SymbolicPHI will 5365if (
Op == SymbolicPHI)
5370if (SourceBits != NewBits)
5378 SExt ? dyn_cast<SCEVTruncateExpr>(SExt->
getOperand())
5379 : dyn_cast<SCEVTruncateExpr>(ZExt->
getOperand());
5383if (
X != SymbolicPHI)
5393if (!L || L->getHeader() != PN->
getParent())
5398// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the 5399// computation that updates the phi follows the following pattern: 5400// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum 5401// which correspond to a phi->trunc->sext/zext->add->phi update chain. 5402// If so, try to see if it can be rewritten as an AddRecExpr under some 5403// Predicates. If successful, return them as a pair. Also cache the results 5406// Example usage scenario: 5407// Say the Rewriter is called for the following SCEV: 5408// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) 5410// %X = phi i64 (%Start, %BEValue) 5411// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), 5412// and call this function with %SymbolicPHI = %X. 5414// The analysis will find that the value coming around the backedge has 5415// the following SCEV: 5416// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) 5417// Upon concluding that this matches the desired pattern, the function 5418// will return the pair {NewAddRec, SmallPredsVec} where: 5419// NewAddRec = {%Start,+,%Step} 5420// SmallPredsVec = {P1, P2, P3} as follows: 5421// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw> 5422// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) 5423// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) 5424// The returned pair means that SymbolicPHI can be rewritten into NewAddRec 5425// under the predicates {P1,P2,P3}. 5426// This predicated rewrite will be cached in PredicatedSCEVRewrites: 5427// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} 5431// 1) Extend the Induction descriptor to also support inductions that involve 5432// casts: When needed (namely, when we are called in the context of the 5433// vectorizer induction analysis), a Set of cast instructions will be 5434// populated by this method, and provided back to isInductionPHI. This is 5435// needed to allow the vectorizer to properly record them to be ignored by 5436// the cost model and to avoid vectorizing them (otherwise these casts, 5437// which are redundant under the runtime overflow checks, will be 5438// vectorized, which can be costly). 5440// 2) Support additional induction/PHISCEV patterns: We also want to support 5441// inductions where the sext-trunc / zext-trunc operations (partly) occur 5442// after the induction update operation (the induction increment): 5444// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) 5445// which correspond to a phi->add->trunc->sext/zext->phi update chain. 5447// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) 5448// which correspond to a phi->trunc->add->sext/zext->phi update chain. 5450// 3) Outline common code with createAddRecFromPHI to avoid duplication. 5451std::optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
5452ScalarEvolution::createAddRecFromPHIWithCastsImpl(
constSCEVUnknown *SymbolicPHI) {
5455// *** Part1: Analyze if we have a phi-with-cast pattern for which we can 5456// return an AddRec expression under some predicate. 5458auto *PN = cast<PHINode>(SymbolicPHI->
getValue());
5460assert(L &&
"Expecting an integer loop header phi");
5462// The loop may have multiple entrances or multiple exits; we can analyze 5463// this phi as an addrec if it has a unique entry value and a unique 5465Value *BEValueV =
nullptr, *StartValueV =
nullptr;
5466for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
5467Value *
V = PN->getIncomingValue(i);
5468if (
L->contains(PN->getIncomingBlock(i))) {
5471 }
elseif (BEValueV != V) {
5475 }
elseif (!StartValueV) {
5477 }
elseif (StartValueV != V) {
5478 StartValueV =
nullptr;
5482if (!BEValueV || !StartValueV)
5487// If the value coming around the backedge is an add with the symbolic 5488// value we just inserted, possibly with casts that we can ignore under 5489// an appropriate runtime guard, then we found a simple induction variable! 5490constauto *
Add = dyn_cast<SCEVAddExpr>(BEValue);
5494// If there is a single occurrence of the symbolic value, possibly 5495// casted, replace it with a recurrence. 5496unsigned FoundIndex =
Add->getNumOperands();
5497Type *TruncTy =
nullptr;
5499for (
unsigned i = 0, e =
Add->getNumOperands(); i != e; ++i)
5502if (FoundIndex == e) {
5507if (FoundIndex ==
Add->getNumOperands())
5510// Create an add with everything but the specified operand. 5512for (
unsigned i = 0, e =
Add->getNumOperands(); i != e; ++i)
5517// The runtime checks will not be valid if the step amount is 5518// varying inside the loop. 5522// *** Part2: Create the predicates 5524// Analysis was successful: we have a phi-with-cast pattern for which we 5525// can return an AddRec expression under the following predicates: 5527// P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) 5528// fits within the truncated type (does not overflow) for i = 0 to n-1. 5529// P2: An Equal predicate that guarantees that 5530// Start = (Ext ix (Trunc iy (Start) to ix) to iy) 5531// P3: An Equal predicate that guarantees that 5532// Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) 5534// As we next prove, the above predicates guarantee that: 5535// Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) 5538// More formally, we want to prove that: 5539// Expr(i+1) = Start + (i+1) * Accum 5540// = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum 5543// 1) Expr(0) = Start 5544// 2) Expr(1) = Start + Accum 5545// = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 5546// 3) Induction hypothesis (step i): 5547// Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum 5551// = Start + (i+1)*Accum 5552// = (Start + i*Accum) + Accum 5554// = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum 5557// = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum 5559// = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) 5560// + (Ext ix (Trunc iy (Accum) to ix) to iy) 5561// + Accum :: from P3 5563// = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) 5564// + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) 5566// = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum 5567// = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum 5569// By induction, the same applies to all iterations 1<=i<n: 5572// Create a truncated addrec for which we will add a no overflow check (P1). 5578// PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr. 5579// ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV 5582// If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't 5584if (
constauto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
5592// Create the Equal Predicates P2,P3: 5594// It is possible that the predicates P2 and/or P3 are computable at 5595// compile time due to StartVal and/or Accum being constants. 5596// If either one is, then we can check that now and escape if either P2 5599// Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) 5600// for each of StartVal and Accum 5601auto getExtendedExpr = [&](
constSCEV *Expr,
5602bool CreateSignExtend) ->
constSCEV * {
5605constSCEV *ExtendedExpr =
5612// ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy 5613// = getExtendedExpr(Expr) 5614// Determine whether the predicate P: Expr == ExtendedExpr 5615// is known to be false at compile time 5616auto PredIsKnownFalse = [&](
constSCEV *Expr,
5617constSCEV *ExtendedExpr) ->
bool {
5618return Expr != ExtendedExpr &&
5622constSCEV *StartExtended = getExtendedExpr(StartVal,
Signed);
5623if (PredIsKnownFalse(StartVal, StartExtended)) {
5628// The Step is always Signed (because the overflow checks are either 5630constSCEV *AccumExtended = getExtendedExpr(Accum,
/*CreateSignExtend=*/true);
5631if (PredIsKnownFalse(Accum, AccumExtended)) {
5636auto AppendPredicate = [&](
constSCEV *Expr,
5637constSCEV *ExtendedExpr) ->
void {
5638if (Expr != ExtendedExpr &&
5646 AppendPredicate(StartVal, StartExtended);
5647 AppendPredicate(Accum, AccumExtended);
5649// *** Part3: Predicates are ready. Now go ahead and create the new addrec in 5650// which the casts had been folded away. The caller can rewrite SymbolicPHI 5651// into NewAR if it will also add the runtime overflow checks specified in 5655 std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite =
5656 std::make_pair(NewAR, Predicates);
5657// Remember the result of the analysis for this SCEV at this locayyytion. 5658 PredicatedSCEVRewrites[{SymbolicPHI,
L}] = PredRewrite;
5662std::optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
5664auto *PN = cast<PHINode>(SymbolicPHI->
getValue());
5669// Check to see if we already analyzed this PHI. 5670autoI = PredicatedSCEVRewrites.find({SymbolicPHI, L});
5671if (
I != PredicatedSCEVRewrites.end()) {
5672 std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
5674// Analysis was done before and failed to create an AddRec: 5675if (Rewrite.first == SymbolicPHI)
5677// Analysis was done before and succeeded to create an AddRec under 5679assert(isa<SCEVAddRecExpr>(Rewrite.first) &&
"Expected an AddRec");
5680assert(!(Rewrite.second).empty() &&
"Expected to find Predicates");
5684 std::optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
5685 Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);
5687// Record in the cache that the analysis failed 5690 PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
5697// FIXME: This utility is currently required because the Rewriter currently 5698// does not rewrite this expression: 5699// {0, +, (sext ix (trunc iy to ix) to iy)} 5700// into {0, +, %step}, 5701// even when the following Equal predicate exists: 5702// "%step == (sext ix (trunc iy to ix) to iy)". 5708auto areExprsEqual = [&](
constSCEV *Expr1,
constSCEV *Expr2) ->
bool {
5709if (Expr1 != Expr2 &&
5722/// A helper function for createAddRecFromPHI to handle simple cases. 5724/// This function tries to find an AddRec expression for the simplest (yet most 5725/// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)). 5726/// If it fails, createAddRecFromPHI will use a more general, but slow, 5727/// technique for finding the AddRec expression. 5728constSCEV *ScalarEvolution::createSimpleAffineAddRec(
PHINode *PN,
5730Value *StartValueV) {
5733assert(BEValueV && StartValueV);
5739if (BO->Opcode != Instruction::Add)
5742constSCEV *Accum =
nullptr;
5743if (BO->LHS == PN && L->isLoopInvariant(BO->RHS))
5745elseif (BO->RHS == PN && L->isLoopInvariant(BO->LHS))
5759 insertValueToMap(PN, PHISCEV);
5761if (
auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
5764 proveNoWrapViaConstantRanges(AR)));
5767// We can add Flags to the post-inc expression only if we 5768// know that it is *undefined behavior* for BEValueV to 5770if (
auto *BEInst = dyn_cast<Instruction>(BEValueV)) {
5772"Accum is defined outside L, but is not invariant?");
5773if (isAddRecNeverPoison(BEInst, L))
5780constSCEV *ScalarEvolution::createAddRecFromPHI(
PHINode *PN) {
5785// The loop may have multiple entrances or multiple exits; we can analyze 5786// this phi as an addrec if it has a unique entry value and a unique 5788Value *BEValueV =
nullptr, *StartValueV =
nullptr;
5794 }
elseif (BEValueV != V) {
5798 }
elseif (!StartValueV) {
5800 }
elseif (StartValueV != V) {
5801 StartValueV =
nullptr;
5805if (!BEValueV || !StartValueV)
5809"PHI node already processed?");
5811// First, try to find AddRec expression without creating a fictituos symbolic 5813if (
auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV))
5816// Handle PHI node value symbolically. 5818 insertValueToMap(PN, SymbolicName);
5820// Using this symbolic name for the PHI, analyze the value coming around 5824// NOTE: If BEValue is loop invariant, we know that the PHI node just 5825// has a special value for the first iteration of the loop. 5827// If the value coming around the backedge is an add with the symbolic 5828// value we just inserted, then we found a simple induction variable! 5830// If there is a single occurrence of the symbolic value, replace it 5831// with a recurrence. 5832unsigned FoundIndex =
Add->getNumOperands();
5833for (
unsigned i = 0, e =
Add->getNumOperands(); i != e; ++i)
5834if (
Add->getOperand(i) == SymbolicName)
5835if (FoundIndex == e) {
5840if (FoundIndex !=
Add->getNumOperands()) {
5841// Create an add with everything but the specified operand. 5843for (
unsigned i = 0, e =
Add->getNumOperands(); i != e; ++i)
5845 Ops.
push_back(SCEVBackedgeConditionFolder::rewrite(
Add->getOperand(i),
5849// This is not a valid addrec if the step amount is varying each 5850// loop iteration, but is not itself an addrec in this loop. 5852 (isa<SCEVAddRecExpr>(Accum) &&
5853 cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
5857if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
5864if (
GEP->getOperand(0) == PN) {
5866// If the increment has any nowrap flags, then we know the address 5867// space cannot be wrapped around. 5870// If the GEP is nuw or nusw with non-negative offset, we know that 5871// no unsigned wrap occurs. We cannot set the nsw flag as only the 5872// offset is treated as signed, while the base is unsigned. 5878// We cannot transfer nuw and nsw flags from subtraction 5879// operations -- sub nuw X, Y is not the same as add nuw X, -Y 5886// Okay, for the entire analysis of this edge we assumed the PHI 5887// to be symbolic. We now need to go back and purge all of the 5888// entries for the scalars that use the symbolic expression. 5889 forgetMemoizedResults(SymbolicName);
5890 insertValueToMap(PN, PHISCEV);
5892if (
auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
5895 proveNoWrapViaConstantRanges(AR)));
5898// We can add Flags to the post-inc expression only if we 5899// know that it is *undefined behavior* for BEValueV to 5901if (
auto *BEInst = dyn_cast<Instruction>(BEValueV))
5909// Otherwise, this could be a loop like this: 5910// i = 0; for (j = 1; ..; ++j) { .... i = j; } 5911// In this case, j = {1,+,1} and BEValue is j. 5912// Because the other in-value of i (0) fits the evolution of BEValue 5913// i really is an addrec evolution. 5915// We can generalize this saying that i is the shifted value of BEValue 5917// PHI(f(0), f({1,+,1})) --> f({0,+,1}) 5919// Do not allow refinement in rewriting of BEValue. 5920if (isGuaranteedNotToCauseUB(BEValue)) {
5921constSCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *
this);
5922constSCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *
this,
false);
5926if (Start == StartVal) {
5927// Okay, for the entire analysis of this edge we assumed the PHI 5928// to be symbolic. We now need to go back and purge all of the 5929// entries for the scalars that use the symbolic expression. 5930 forgetMemoizedResults(SymbolicName);
5931 insertValueToMap(PN, Shifted);
5938// Remove the temporary PHI node SCEV that has been inserted while intending 5939// to create an AddRecExpr for this PHI node. We can not keep this temporary 5940// as it will prevent later (possibly simpler) SCEV expressions to be added 5941// to the ValueExprMap. 5942 eraseValueFromMap(PN);
5947// Try to match a control flow sequence that branches out at BI and merges back 5948// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful 5962Use &LeftUse =
Merge->getOperandUse(0);
5963Use &RightUse =
Merge->getOperandUse(1);
5980constSCEV *ScalarEvolution::createNodeFromSelectLikePHI(
PHINode *PN) {
5986// br %cond, label %left, label %right 5992// V = phi [ %x, %left ], [ %y, %right ] 5994// as "select %cond, %x, %y" 5997assert(IDom &&
"At least the entry block should dominate PN");
6006return createNodeForSelectOrPHI(PN,
Cond, LHS, RHS);
6012/// Returns SCEV for the first operand of a phi if all phi operands have 6013/// identical opcodes and operands 6015/// a: %add = %a + %b 6017/// b: %add1 = %a + %b 6019/// c: %phi = phi [%add, a], [%add1, b] 6020/// scev(%phi) => scev(%add) 6022ScalarEvolution::createNodeForPHIWithIdenticalOperands(
PHINode *PN) {
6024// Check if instructions are identical. 6026auto *IncomingInst = dyn_cast<BinaryOperator>(
Incoming);
6031returnnullptr;
// Not identical, give up 6033// Remember binary operator 6034 CommonInst = IncomingInst;
6040// Check if SCEV exprs for instructions are identical. 6042bool SCEVExprsIdentical =
6044 [
this, CommonSCEV](
Value *V) { return CommonSCEV == getSCEV(V); });
6045return SCEVExprsIdentical ? CommonSCEV :
nullptr;
6048constSCEV *ScalarEvolution::createNodeForPHI(
PHINode *PN) {
6049if (
constSCEV *S = createAddRecFromPHI(PN))
6052// We do not allow simplifying phi (undef, X) to X here, to avoid reusing the 6056/*UseInstrInfo=*/true,
/*CanUseUndef=*/false}))
6059if (
constSCEV *S = createNodeForPHIWithIdenticalOperands(PN))
6062if (
constSCEV *S = createNodeFromSelectLikePHI(PN))
6065// If it's not a loop phi, we can't handle it yet. 6072constSCEV *OperandToFind;
6073constSCEVTypes RootKind;
// Must be a sequential min/max expression. 6074constSCEVTypes NonSequentialRootKind;
// Non-seq variant of RootKind. 6078bool canRecurseInto(
SCEVTypes Kind)
const{
6079// We can only recurse into the SCEV expression of the same effective type 6080// as the type of our root SCEV expression, and into zero-extensions. 6081return RootKind == Kind || NonSequentialRootKind == Kind ||
6086 : OperandToFind(OperandToFind), RootKind(RootKind),
6087 NonSequentialRootKind(
6091bool follow(
constSCEV *S) {
6092 Found = S == OperandToFind;
6094return !isDone() && canRecurseInto(S->
getSCEVType());
6097bool isDone()
const{
return Found; }
6100 FindClosure FC(OperandToFind, RootKind);
6105std::optional<const SCEV *>
6106ScalarEvolution::createNodeForSelectOrPHIInstWithICmpInstCond(
Type *Ty,
6110// Try to match some simple smax or umax patterns. 6116switch (ICI->getPredicate()) {
6127// a > b ? a+x : b+x -> max(a, b)+x 6128// a > b ? b+x : a+x -> min(a, b)+x 6130boolSigned = ICI->isSigned();
6136// FIXME: Handle cases where LS/RS are pointers not equal to LA/RA. 6137// Need to make sure we can't produce weird expressions involving 6139if (LA == LS &&
RA == RS)
6141if (LA == RS &&
RA == LS)
6144auto CoerceOperand = [&](
constSCEV *
Op) ->
constSCEV * {
6145if (
Op->getType()->isPointerTy()) {
6147if (isa<SCEVCouldNotCompute>(
Op))
6156LS = CoerceOperand(LS);
6157 RS = CoerceOperand(RS);
6158if (isa<SCEVCouldNotCompute>(LS) || isa<SCEVCouldNotCompute>(RS))
6173// x != 0 ? x+y : C+y -> x == 0 ? C+y : x+y 6177// x == 0 ? C+y : x+y -> umax(x, C)+y iff C u<= 1 6179 isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->
isZero()) {
6185if (isa<SCEVConstant>(
C) && cast<SCEVConstant>(
C)->getAPInt().ule(1))
6188// x == 0 ? 0 : umin (..., x, ...) -> umin_seq(x, umin (...)) 6189// x == 0 ? 0 : umin_seq(..., x, ...) -> umin_seq(x, umin_seq(...)) 6190// x == 0 ? 0 : umin (..., umin_seq(..., x, ...), ...) 6191// -> umin_seq(x, umin (..., umin_seq(...), ...)) 6192if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->
isZero() &&
6193 isa<ConstantInt>(TrueVal) && cast<ConstantInt>(TrueVal)->
isZero()) {
6195while (
auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(
X))
6196X = ZExt->getOperand();
6201/*Sequential=*/true);
6212static std::optional<const SCEV *>
6214constSCEV *TrueExpr,
constSCEV *FalseExpr) {
6218"Unexpected operands of a select.");
6220// i1 cond ? i1 x : i1 C --> C + (i1 cond ? (i1 x - i1 C) : i1 0) 6221// --> C + (umin_seq cond, x - C) 6223// i1 cond ? i1 C : i1 x --> C + (i1 cond ? i1 0 : (i1 x - i1 C)) 6224// --> C + (i1 ~cond ? (i1 x - i1 C) : i1 0) 6225// --> C + (umin_seq ~cond, x - C) 6227// FIXME: while we can't legally model the case where both of the hands 6228// are fully variable, we only require that the *difference* is constant. 6229if (!isa<SCEVConstant>(TrueExpr) && !isa<SCEVConstant>(FalseExpr))
6233if (isa<SCEVConstant>(TrueExpr)) {
6242/*Sequential=*/true));
6245static std::optional<const SCEV *>
6248if (!isa<ConstantInt>(TrueVal) && !isa<ConstantInt>(FalseVal))
6252constauto *SETrue = SE->
getSCEV(TrueVal);
6253constauto *SEFalse = SE->
getSCEV(FalseVal);
6257constSCEV *ScalarEvolution::createNodeForSelectOrPHIViaUMinSeq(
6259assert(
Cond->getType()->isIntegerTy(1) &&
"Select condition is not an i1?");
6261V->getType() ==
TrueVal->getType() &&
6262"Types of select hands and of the result must match.");
6264// For now, only deal with i1-typed `select`s. 6265if (!
V->getType()->isIntegerTy(1))
6268if (std::optional<const SCEV *> S =
6278// Handle "constant" branch or select. This can occur for instance when a 6279// loop pass transforms an inner loop and moves on to process the outer loop. 6280if (
auto *CI = dyn_cast<ConstantInt>(
Cond))
6281returngetSCEV(CI->isOne() ? TrueVal : FalseVal);
6283if (
auto *
I = dyn_cast<Instruction>(V)) {
6284if (
auto *ICI = dyn_cast<ICmpInst>(
Cond)) {
6285if (std::optional<const SCEV *> S =
6286 createNodeForSelectOrPHIInstWithICmpInstCond(
I->getType(), ICI,
6292return createNodeForSelectOrPHIViaUMinSeq(V,
Cond, TrueVal, FalseVal);
6295/// Expand GEP instructions into add and multiply operations. This allows them 6296/// to be analyzed by regular SCEV code. 6298assert(
GEP->getSourceElementType()->isSized() &&
6299"GEP source element type must be sized");
6307APInt ScalarEvolution::getConstantMultipleImpl(
constSCEV *S) {
6315// The result is GCD of all operands results. 6317for (
unsignedI = 1, E =
N->getNumOperands();
I < E && Res != 1; ++
I)
6325return cast<SCEVConstant>(S)->getAPInt();
6332// Only multiples that are a power of 2 will hold after truncation. 6335return GetShiftedByZeros(TZ);
6342// Only multiples that are a power of 2 will hold after sext. 6345return GetShiftedByZeros(TZ);
6349if (
M->hasNoUnsignedWrap()) {
6350// The result is the product of all operand results. 6352for (
constSCEV *Operand :
M->operands().drop_front())
6357// If there are no wrap guarentees, find the trailing zeros, which is the 6358// sum of trailing zeros for all its operands. 6360for (
constSCEV *Operand :
M->operands())
6362return GetShiftedByZeros(TZ);
6367if (
N->hasNoUnsignedWrap())
6368return GetGCDMultiple(
N);
6369// Find the trailing bits, which is the minimum of its operands. 6371for (
constSCEV *Operand :
N->operands().drop_front())
6373return GetShiftedByZeros(TZ);
6380return GetGCDMultiple(cast<SCEVNAryExpr>(S));
6382// ask ValueTracking for known bits 6386 .countMinTrailingZeros();
6387return GetShiftedByZeros(Known);
6396autoI = ConstantMultipleCache.find(S);
6397if (
I != ConstantMultipleCache.end())
6400APInt Result = getConstantMultipleImpl(S);
6401auto InsertPair = ConstantMultipleCache.insert({S, Result});
6402assert(InsertPair.second &&
"Should insert a new key");
6403return InsertPair.first->second;
6416/// Helper method to assign a range to V from metadata present in the IR. 6419if (
MDNode *MD =
I->getMetadata(LLVMContext::MD_range))
6421if (
constauto *CB = dyn_cast<CallBase>(V))
6422if (std::optional<ConstantRange>
Range = CB->getRange())
6425if (
auto *
A = dyn_cast<Argument>(V))
6426if (std::optional<ConstantRange>
Range =
A->getRange())
6436 UnsignedRanges.erase(AddRec);
6437 SignedRanges.erase(AddRec);
6438 ConstantMultipleCache.erase(AddRec);
6443getRangeForUnknownRecurrence(
constSCEVUnknown *U) {
6449// Match a simple recurrence of the form: <start, ShiftOp, Step>, and then 6450// use information about the trip count to improve our available range. Note 6451// that the trip count independent cases are already handled by known bits. 6452// WARNING: The definition of recurrence used here is subtly different than 6453// the one used by AddRec (and thus most of this file). Step is allowed to 6454// be arbitrarily loop varying here, where AddRec allows only loop invariant 6455// and other addrecs in the same loop (for non-affine addrecs). The code 6456// below intentionally handles the case where step is not loop invariant. 6457auto *
P = dyn_cast<PHINode>(U->getValue());
6461// Make sure that no Phi input comes from an unreachable block. Otherwise, 6462// even the values that are not available in these blocks may come from them, 6463// and this leads to false-positive recurrence test. 6473// If we found a recurrence in reachable code, we must be in a loop. Note 6474// that BO might be in some subloop of L, and that's completely okay. 6476assert(L && L->getHeader() ==
P->getParent());
6478// NOTE: This bailout should be an assert instead. However, asserting 6479// the condition here exposes a case where LoopFusion is querying SCEV 6480// with malformed loop information during the midst of the transform. 6481// There doesn't appear to be an obvious fix, so for the moment bailout 6482// until the caller issue can be fixed. PR49566 tracks the bug. 6485// TODO: Extend to other opcodes such as mul, and div 6489case Instruction::AShr:
6490case Instruction::LShr:
6491case Instruction::Shl:
6496// TODO: Handle the power function forms some day. 6506 KnownStep.getBitWidth() ==
BitWidth);
6508// Compute total shift amount, being careful of overflow and bitwidths. 6509auto MaxShiftAmt = KnownStep.getMaxValue();
6511bool Overflow =
false;
6512auto TotalShift = MaxShiftAmt.umul_ov(TCAP, Overflow);
6519case Instruction::AShr: {
6520// For each ashr, three cases: 6521// shift = 0 => unchanged value 6522// saturation => 0 or -1 6523// other => a value closer to zero (of the same sign) 6524// Thus, the end value is closer to zero than the start. 6527if (KnownStart.isNonNegative())
6528// Analogous to lshr (simply not yet canonicalized) 6530 KnownStart.getMaxValue() + 1);
6531if (KnownStart.isNegative())
6532// End >=u Start && End <=s Start 6534 KnownEnd.getMaxValue() + 1);
6537case Instruction::LShr: {
6538// For each lshr, three cases: 6539// shift = 0 => unchanged value 6541// other => a smaller positive number 6542// Thus, the low end of the unsigned range is the last value produced. 6546 KnownStart.getMaxValue() + 1);
6548case Instruction::Shl: {
6549// Iff no bits are shifted out, value increases on every shift. 6552if (TotalShift.ult(KnownStart.countMinLeadingZeros()))
6554 KnownEnd.getMaxValue() + 1);
6562ScalarEvolution::getRangeRefIter(
constSCEV *S,
6563 ScalarEvolution::RangeSignHint SignHint) {
6565 SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
6570// Add Expr to the worklist, if Expr is either an N-ary expression or a 6571// SCEVUnknown PHI node. 6572auto AddToWorklist = [&WorkList, &Seen, &Cache](
constSCEV *Expr) {
6573if (!Seen.
insert(Expr).second)
6579if (!isa<PHINode>(cast<SCEVUnknown>(Expr)->getValue()))
6605// Build worklist by queuing operands of N-ary expressions and phi nodes. 6606for (
unsignedI = 0;
I != WorkList.
size(); ++
I) {
6607constSCEV *
P = WorkList[
I];
6608auto *UnknownS = dyn_cast<SCEVUnknown>(
P);
6609// If it is not a `SCEVUnknown`, just recurse into operands. 6611for (
constSCEV *
Op :
P->operands())
6615// `SCEVUnknown`'s require special treatment. 6616if (
constPHINode *
P = dyn_cast<PHINode>(UnknownS->getValue())) {
6617if (!PendingPhiRangesIter.insert(
P).second)
6624if (!WorkList.
empty()) {
6625// Use getRangeRef to compute ranges for items in the worklist in reverse 6626// order. This will force ranges for earlier operands to be computed before 6627// their users in most cases. 6629 getRangeRef(
P, SignHint);
6631if (
auto *UnknownS = dyn_cast<SCEVUnknown>(
P))
6632if (
constPHINode *
P = dyn_cast<PHINode>(UnknownS->getValue()))
6633 PendingPhiRangesIter.erase(
P);
6637return getRangeRef(S, SignHint, 0);
6640/// Determine the range for a particular SCEV. If SignHint is 6641/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges 6642/// with a "cleaner" unsigned (resp. signed) representation. 6644constSCEV *S, ScalarEvolution::RangeSignHint SignHint,
unsignedDepth) {
6646 SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
6652// See if we've computed this range already. 6654if (
I != Cache.
end())
6660// Switch to iteratively computing the range for S, if it is part of a deeply 6661// nested expression. 6663return getRangeRefIter(S, SignHint);
6669// If the value has known zeros, the maximum value will have those known zeros 6671if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
6675 ConservativeResult =
6698 ConservativeResult.intersectWith(
X.truncate(
BitWidth), RangeType));
6705 ConservativeResult.intersectWith(
X.zeroExtend(
BitWidth), RangeType));
6712 ConservativeResult.intersectWith(
X.signExtend(
BitWidth), RangeType));
6717return setRange(PtrToInt, SignHint,
X);
6722unsigned WrapType = OBO::AnyWrap;
6723if (
Add->hasNoSignedWrap())
6724 WrapType |= OBO::NoSignedWrap;
6725if (
Add->hasNoUnsignedWrap())
6726 WrapType |= OBO::NoUnsignedWrap;
6728X =
X.addWithNoWrap(getRangeRef(
Op, SignHint,
Depth + 1), WrapType,
6730return setRange(
Add, SignHint,
6731 ConservativeResult.intersectWith(
X, RangeType));
6737X =
X.multiply(getRangeRef(
Op, SignHint,
Depth + 1));
6738return setRange(
Mul, SignHint,
6739 ConservativeResult.intersectWith(
X, RangeType));
6745return setRange(UDiv, SignHint,
6746 ConservativeResult.intersectWith(
X.udiv(
Y), RangeType));
6750// If there's no unsigned wrap, the value will never be less than its 6754if (!UnsignedMinValue.
isZero())
6755 ConservativeResult = ConservativeResult.intersectWith(
6759// If there's no signed wrap, and all the operands except initial value have 6760// the same sign or zero, the value won't ever be: 6761// 1: smaller than initial value if operands are non negative, 6762// 2: bigger than initial value if operands are non positive. 6763// For both cases, value can not cross signed min/max boundary. 6765bool AllNonNeg =
true;
6766bool AllNonPos =
true;
6774 ConservativeResult = ConservativeResult.intersectWith(
6779 ConservativeResult = ConservativeResult.intersectWith(
6786// TODO: non-affine addrec 6788constSCEV *MaxBEScev =
6790if (!isa<SCEVCouldNotCompute>(MaxBEScev)) {
6791APInt MaxBECount = cast<SCEVConstant>(MaxBEScev)->getAPInt();
6793// Adjust MaxBECount to the same bitwidth as AddRec. We can truncate if 6794// MaxBECount's active bits are all <= AddRec's bit width. 6802auto RangeFromAffine = getRangeForAffineAR(
6804 ConservativeResult =
6805 ConservativeResult.intersectWith(RangeFromAffine, RangeType);
6807auto RangeFromFactoring = getRangeViaFactoring(
6809 ConservativeResult =
6810 ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
6814// Now try symbolic BE count and more powerful methods. 6816constSCEV *SymbolicMaxBECount =
6818if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) &&
6821auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
6822 AddRec, SymbolicMaxBECount,
BitWidth, SignHint);
6823 ConservativeResult =
6824 ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
6829return setRange(AddRec, SignHint, std::move(ConservativeResult));
6839ID = Intrinsic::umax;
6842ID = Intrinsic::smax;
6846ID = Intrinsic::umin;
6849ID = Intrinsic::smin;
6855constauto *NAry = cast<SCEVNAryExpr>(S);
6857for (
unsigned i = 1, e = NAry->getNumOperands(); i != e; ++i)
6859ID, {
X, getRangeRef(NAry->getOperand(i), SignHint,
Depth + 1)});
6860return setRange(S, SignHint,
6861 ConservativeResult.intersectWith(
X, RangeType));
6867// Check if the IR explicitly contains !range metadata. 6870 ConservativeResult =
6871 ConservativeResult.intersectWith(*MDRange, RangeType);
6873// Use facts about recurrences in the underlying IR. Note that add 6874// recurrences are AddRecExprs and thus don't hit this path. This 6875// primarily handles shift recurrences. 6876auto CR = getRangeForUnknownRecurrence(U);
6877 ConservativeResult = ConservativeResult.intersectWith(CR);
6879// See if ValueTracking can give us a useful range. 6885// ValueTracking may be able to compute a tighter result for the number of 6886// sign bits than for the value of those sign bits. 6888if (
U->getType()->isPointerTy()) {
6889// If the pointer size is larger than the index size type, this can cause 6890// NS to be larger than BitWidth. So compensate for this. 6891unsigned ptrSize =
DL.getPointerTypeSizeInBits(
U->getType());
6893if (ptrIdxDiff > 0 && ptrSize >
BitWidth && NS > (
unsigned)ptrIdxDiff)
6898// If we know any of the sign bits, we know all of the sign bits. 6906 ConservativeResult = ConservativeResult.intersectWith(
6910 ConservativeResult = ConservativeResult.intersectWith(
6915if (
U->getType()->isPointerTy() && SignHint == HINT_RANGE_UNSIGNED) {
6916// Strengthen the range if the underlying IR value is a 6917// global/alloca/heap allocation using the size of the object. 6918bool CanBeNull, CanBeFreed;
6920V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
6922// The highest address the object can start is DerefBytes bytes before 6923// the end (unsigned max value). If this value is not a multiple of the 6924// alignment, the last possible start value is the next lowest multiple 6925// of the alignment. Note: The computations below cannot overflow, 6926// because if they would there's no possible start address for the 6936 ConservativeResult = ConservativeResult.intersectWith(
6941// A range of Phi is a subset of union of all ranges of its input. 6942if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
6943// Make sure that we do not run over cycled Phis. 6944if (PendingPhiRanges.insert(Phi).second) {
6947for (
constauto &
Op :
Phi->operands()) {
6949 RangeFromOps = RangeFromOps.unionWith(OpRange);
6950// No point to continue if we already have a full set. 6951if (RangeFromOps.isFullSet())
6954 ConservativeResult =
6955 ConservativeResult.intersectWith(RangeFromOps, RangeType);
6956bool Erased = PendingPhiRanges.erase(Phi);
6957assert(Erased &&
"Failed to erase Phi properly?");
6962// vscale can't be equal to zero 6963if (
constauto *
II = dyn_cast<IntrinsicInst>(V))
6964if (
II->getIntrinsicID() == Intrinsic::vscale) {
6966 ConservativeResult = ConservativeResult.difference(Disallowed);
6969return setRange(U, SignHint, std::move(ConservativeResult));
6975return setRange(S, SignHint, std::move(ConservativeResult));
6978// Given a StartRange, Step and MaxBECount for an expression compute a range of 6979// values that the expression can take. Initially, the expression has a value 6980// from StartRange and then is changed by Step up to MaxBECount times. Signed 6981// argument defines if we treat Step as signed or unsigned. 6984constAPInt &MaxBECount,
6989// If either Step or MaxBECount is 0, then the expression won't change, and we 6990// just need to return the initial range. 6991if (Step == 0 || MaxBECount == 0)
6994// If we don't know anything about the initial value (i.e. StartRange is 6995// FullRange), then we don't know anything about the final range either. 6998return ConstantRange::getFull(
BitWidth);
7000// If Step is signed and negative, then we use its absolute value, but we also 7001// note that we're moving in the opposite direction. 7005// This is correct even for INT_SMIN. Let's look at i8 to illustrate this: 7006// abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128. 7007// This equations hold true due to the well-defined wrap-around behavior of 7011// Check if Offset is more than full span of BitWidth. If it is, the 7012// expression is guaranteed to overflow. 7014return ConstantRange::getFull(
BitWidth);
7016// Offset is by how much the expression can change. Checks above guarantee no 7020// Minimum value of the final range will match the minimal value of StartRange 7021// if the expression is increasing and will be decreased by Offset otherwise. 7022// Maximum value of the final range will match the maximal value of StartRange 7023// if the expression is decreasing and will be increased by Offset otherwise. 7026APInt MovedBoundary = Descending ? (StartLower - std::move(
Offset))
7027 : (StartUpper + std::move(
Offset));
7029// It's possible that the new minimum/maximum value will fall into the initial 7030// range (due to wrap around). This means that the expression can take any 7031// value in this bitwidth, and we have to return full range. 7032if (StartRange.
contains(MovedBoundary))
7033return ConstantRange::getFull(
BitWidth);
7036 Descending ? std::move(MovedBoundary) : std::move(StartLower);
7038 Descending ? std::move(StartUpper) : std::move(MovedBoundary);
7041// No overflow detected, return [StartLower, StartUpper + Offset + 1) range. 7047constAPInt &MaxBECount) {
7051"mismatched bit widths");
7053// First, consider step signed. 7057// If Step can be both positive and negative, we need to find ranges for the 7058// maximum absolute step values in both directions and union them. 7060 StepSRange.
getSignedMin(), StartSRange, MaxBECount,
/* Signed = */true);
7062 StartSRange, MaxBECount,
7063/* Signed = */true));
7065// Next, consider step unsigned. 7068/* Signed = */false);
7070// Finally, intersect signed and unsigned ranges. 7074ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
7076 ScalarEvolution::RangeSignHint SignHint) {
7077assert(AddRec->
isAffine() &&
"Non-affine AddRecs are not suppored!\n");
7079"This only works for non-self-wrapping AddRecs!");
7080constbool IsSigned = SignHint == HINT_RANGE_SIGNED;
7082// Only deal with constant step to save compile time. 7083if (!isa<SCEVConstant>(Step))
7084return ConstantRange::getFull(
BitWidth);
7085// Let's make sure that we can prove that we do not self-wrap during 7086// MaxBECount iterations. We need this because MaxBECount is a maximum 7087// iteration count estimate, and we might infer nw from some exit for which we 7088// do not know max exit count (or any other side reasoning). 7089// TODO: Turn into assert at some point. 7092return ConstantRange::getFull(
BitWidth);
7098 MaxItersWithoutWrap))
7099return ConstantRange::getFull(
BitWidth);
7107// We know that there is no self-wrap. Let's take Start and End values and 7108// look at all intermediate values V1, V2, ..., Vn that IndVar takes during 7109// the iteration. They either lie inside the range [Min(Start, End), 7110// Max(Start, End)] or outside it: 7112// Case 1: RangeMin ... Start V1 ... VN End ... RangeMax; 7113// Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax; 7115// No self wrap flag guarantees that the intermediate values cannot be BOTH 7116// outside and inside the range [Min(Start, End), Max(Start, End)]. Using that 7117// knowledge, let's try to prove that we are dealing with Case 1. It is so if 7118// Start <= End and step is positive, or Start >= End and step is negative. 7123// If they already cover full iteration space, we will know nothing useful 7124// even if we prove what we want to prove. 7127// Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax). 7131return ConstantRange::getFull(
BitWidth);
7134 isKnownPredicateViaConstantRanges(LEPred, Start,
End))
7137 isKnownPredicateViaConstantRanges(GEPred, Start,
End))
7139return ConstantRange::getFull(
BitWidth);
7144constAPInt &MaxBECount) {
7145// RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q}) 7146// == RangeOf({A,+,P}) union RangeOf({B,+,Q}) 7151"mismatched bit widths");
7153structSelectPattern {
7154Value *Condition =
nullptr;
7160 std::optional<unsigned> CastOp;
7166// Peel off a constant offset: 7167if (
auto *SA = dyn_cast<SCEVAddExpr>(S)) {
7168// In the future we could consider being smarter here and handle 7169// {Start+Step,+,Step} too. 7170if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0)))
7173Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt();
7174 S = SA->getOperand(1);
7177// Peel off a cast operation 7178if (
auto *SCast = dyn_cast<SCEVIntegralCastExpr>(S)) {
7180 S = SCast->getOperand();
7185auto *SU = dyn_cast<SCEVUnknown>(S);
7197// Re-apply the cast we peeled off earlier 7217// Re-apply the constant offset we peeled off earlier 7222bool isRecognized() {
return Condition !=
nullptr; }
7225 SelectPattern StartPattern(*
this,
BitWidth, Start);
7226if (!StartPattern.isRecognized())
7227return ConstantRange::getFull(
BitWidth);
7229 SelectPattern StepPattern(*
this,
BitWidth, Step);
7230if (!StepPattern.isRecognized())
7231return ConstantRange::getFull(
BitWidth);
7233if (StartPattern.Condition != StepPattern.Condition) {
7234// We don't handle this case today; but we could, by considering four 7235// possibilities below instead of two. I'm not sure if there are cases where 7236// that will help over what getRange already does, though. 7237return ConstantRange::getFull(
BitWidth);
7240// NB! Calling ScalarEvolution::getConstant is fine, but we should not try to 7241// construct arbitrary general SCEV expressions here. This function is called 7242// from deep in the call stack, and calling getSCEV (on a sext instruction, 7243// say) can end up caching a suboptimal value. 7245// FIXME: without the explicit `this` receiver below, MSVC errors out with 7246// C2352 and C2512 (otherwise it isn't needed). 7254 this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount);
7256 this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount);
7265// Return early if there are no flags to propagate to the SCEV. 7278ScalarEvolution::getNonTrivialDefiningScopeBound(
constSCEV *S) {
7279if (
auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
7281if (
auto *U = dyn_cast<SCEVUnknown>(S))
7282if (
auto *
I = dyn_cast<Instruction>(
U->getValue()))
7291// Do a bounded search of the def relation of the requested SCEVs. 7294auto pushOp = [&](
constSCEV *S) {
7295if (!Visited.
insert(S).second)
7297// Threshold of 30 here is arbitrary. 7298if (Visited.
size() > 30) {
7305for (
constauto *S : Ops)
7309while (!Worklist.
empty()) {
7311if (
auto *DefI = getNonTrivialDefiningScopeBound(S)) {
7325return getDefiningScopeBound(Ops, Discard);
7328bool ScalarEvolution::isGuaranteedToTransferExecutionTo(
constInstruction *
A,
7330if (
A->getParent() ==
B->getParent() &&
7336if (BLoop && BLoop->getHeader() ==
B->getParent() &&
7337 BLoop->getLoopPreheader() ==
A->getParent() &&
7339A->getParent()->end()) &&
7346bool ScalarEvolution::isGuaranteedNotToBePoison(
constSCEV *
Op) {
7347 SCEVPoisonCollector PC(
/* LookThroughMaybePoisonBlocking */true);
7349return PC.MaybePoison.empty();
7352bool ScalarEvolution::isGuaranteedNotToCauseUB(
constSCEV *
Op) {
7354auto *UDiv = dyn_cast<SCEVUDivExpr>(S);
7355// The UDiv may be UB if the divisor is poison or zero. Unless the divisor 7356// is a non-zero constant, we have to assume the UDiv may be UB. 7358 !isGuaranteedNotToBePoison(UDiv->getOperand(1)));
7362bool ScalarEvolution::isSCEVExprNeverPoison(
constInstruction *
I) {
7363// Only proceed if we can prove that I does not yield poison. 7367// At this point we know that if I is executed, then it does not wrap 7368// according to at least one of NSW or NUW. If I is not executed, then we do 7369// not know if the calculation that I represents would wrap. Multiple 7370// instructions can map to the same SCEV. If we apply NSW or NUW from I to 7371// the SCEV, we must guarantee no wrapping for that SCEV also when it is 7372// derived from other instructions that map to the same SCEV. We cannot make 7373// that guarantee for cases where I is not executed. So we need to find a 7374// upper bound on the defining scope for the SCEV, and prove that I is 7375// executed every time we enter that scope. When the bounding scope is a 7376// loop (the common case), this is equivalent to proving I executes on every 7377// iteration of that loop. 7379for (
constUse &
Op :
I->operands()) {
7380// I could be an extractvalue from a call to an overflow intrinsic. 7381// TODO: We can do better here in some cases. 7385auto *DefI = getDefiningScopeBound(SCEVOps);
7386return isGuaranteedToTransferExecutionTo(DefI,
I);
7389bool ScalarEvolution::isAddRecNeverPoison(
constInstruction *
I,
constLoop *L) {
7390// If we know that \c I can never be poison period, then that's enough. 7391if (isSCEVExprNeverPoison(
I))
7394// If the loop only has one exit, then we know that, if the loop is entered, 7395// any instruction dominating that exit will be executed. If any such 7396// instruction would result in UB, the addrec cannot be poison. 7398// This is basically the same reasoning as in isSCEVExprNeverPoison(), but 7399// also handles uses outside the loop header (they just need to dominate the 7402auto *ExitingBB =
L->getExitingBlock();
7409// We start by assuming \c I, the post-inc add recurrence, is poison. Only 7410// things that are known to be poison under that assumption go on the 7415while (!Worklist.
empty()) {
7419constInstruction *PoisonUser = cast<Instruction>(
U.getUser());
7425if (KnownPoison.
insert(PoisonUser).second)
7433ScalarEvolution::LoopProperties
7434ScalarEvolution::getLoopProperties(
constLoop *L) {
7435usingLoopProperties = ScalarEvolution::LoopProperties;
7437auto Itr = LoopPropertiesCache.find(L);
7438if (Itr == LoopPropertiesCache.end()) {
7440if (
auto *SI = dyn_cast<StoreInst>(
I))
7441return !
SI->isSimple();
7443returnI->mayThrow() ||
I->mayWriteToMemory();
7446 LoopProperties LP = {
/* HasNoAbnormalExits */true,
7447/*HasNoSideEffects*/true};
7449for (
auto *BB :
L->getBlocks())
7450for (
auto &
I : *BB) {
7452 LP.HasNoAbnormalExits =
false;
7453if (HasSideEffects(&
I))
7454 LP.HasNoSideEffects =
false;
7455if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
7456break;
// We're already as pessimistic as we can get. 7459auto InsertPair = LoopPropertiesCache.insert({
L, LP});
7460assert(InsertPair.second &&
"We just checked!");
7461 Itr = InsertPair.first;
7468// A mustprogress loop without side effects must be finite. 7469// TODO: The check used here is very conservative. It's only *specific* 7470// side effects which are well defined in infinite loops. 7474constSCEV *ScalarEvolution::createSCEVIter(
Value *V) {
7475// Worklist item with a Value and a bool indicating whether all operands have 7476// been visited already. 7480 Stack.emplace_back(V,
true);
7481 Stack.emplace_back(V,
false);
7482while (!Stack.empty()) {
7483auto E = Stack.pop_back_val();
7484Value *CurV = E.getPointer();
7490constSCEV *CreatedSCEV =
nullptr;
7491// If all operands have been visited already, create the SCEV. 7493 CreatedSCEV = createSCEV(CurV);
7495// Otherwise get the operands we need to create SCEV's for before creating 7496// the SCEV for CurV. If the SCEV for CurV can be constructed trivially, 7498 CreatedSCEV = getOperandsToCreate(CurV, Ops);
7502 insertValueToMap(CurV, CreatedSCEV);
7504// Queue CurV for SCEV creation, followed by its's operands which need to 7505// be constructed first. 7506Stack.emplace_back(CurV,
true);
7521// Don't attempt to analyze instructions in blocks that aren't 7522// reachable. Such instructions don't matter, and they aren't required 7523// to obey basic rules for definitions dominating uses which this 7524// analysis depends on. 7527 }
elseif (
ConstantInt *CI = dyn_cast<ConstantInt>(V))
7529elseif (isa<GlobalAlias>(V))
7531elseif (!isa<ConstantExpr>(V))
7537bool IsConstArg = isa<ConstantInt>(BO->RHS);
7538switch (BO->Opcode) {
7539case Instruction::Add:
7540case Instruction::Mul: {
7541// For additions and multiplications, traverse add/mul chains for which we 7542// can potentially create a single SCEV, to reduce the number of 7543// get{Add,Mul}Expr calls. 7553 dyn_cast<Instruction>(V));
7555 (BO->Opcode == Instruction::Add &&
7556 (NewBO->Opcode != Instruction::Add &&
7557 NewBO->Opcode != Instruction::Sub)) ||
7558 (BO->Opcode == Instruction::Mul &&
7559 NewBO->Opcode != Instruction::Mul)) {
7563// CreateSCEV calls getNoWrapFlagsFromUB, which under certain conditions 7564// requires a SCEV for the LHS. 7565if (BO->
Op && (BO->IsNSW || BO->IsNUW)) {
7566auto *
I = dyn_cast<Instruction>(BO->
Op);
7576case Instruction::Sub:
7577case Instruction::UDiv:
7578case Instruction::URem:
7580case Instruction::AShr:
7581case Instruction::Shl:
7582case Instruction::Xor:
7586case Instruction::And:
7587case Instruction::Or:
7591case Instruction::LShr:
7603switch (
U->getOpcode()) {
7604case Instruction::Trunc:
7605case Instruction::ZExt:
7606case Instruction::SExt:
7607case Instruction::PtrToInt:
7611case Instruction::BitCast:
7618case Instruction::SDiv:
7619case Instruction::SRem:
7624case Instruction::GetElementPtr:
7625assert(cast<GEPOperator>(U)->getSourceElementType()->isSized() &&
7626"GEP source element type must be sized");
7627for (
Value *Index :
U->operands())
7631case Instruction::IntToPtr:
7634case Instruction::PHI:
7635// Keep constructing SCEVs' for phis recursively for now. 7638case Instruction::Select: {
7639// Check if U is a select that can be simplified to a SCEVUnknown. 7640auto CanSimplifyToUnknown = [
this,
U]() {
7641if (
U->getType()->isIntegerTy(1) || isa<ConstantInt>(
U->getOperand(0)))
7644auto *ICI = dyn_cast<ICmpInst>(
U->getOperand(0));
7651if (!(isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->
isZero()))
7658if (CanSimplifyToUnknown())
7661for (
Value *Inc :
U->operands())
7666case Instruction::Call:
7667case Instruction::Invoke:
7668if (
Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) {
7673if (
auto *
II = dyn_cast<IntrinsicInst>(U)) {
7674switch (
II->getIntrinsicID()) {
7678case Intrinsic::umax:
7679case Intrinsic::umin:
7680case Intrinsic::smax:
7681case Intrinsic::smin:
7682case Intrinsic::usub_sat:
7683case Intrinsic::uadd_sat:
7687case Intrinsic::start_loop_iterations:
7688case Intrinsic::annotation:
7689case Intrinsic::ptr_annotation:
7702constSCEV *ScalarEvolution::createSCEV(
Value *V) {
7707// Don't attempt to analyze instructions in blocks that aren't 7708// reachable. Such instructions don't matter, and they aren't required 7709// to obey basic rules for definitions dominating uses which this 7710// analysis depends on. 7713 }
elseif (
ConstantInt *CI = dyn_cast<ConstantInt>(V))
7715elseif (isa<GlobalAlias>(V))
7717elseif (!isa<ConstantExpr>(V))
7726switch (BO->Opcode) {
7727case Instruction::Add: {
7728// The simple thing to do would be to just call getSCEV on both operands 7729// and call getAddExpr with the result. However if we're looking at a 7730// bunch of things all added together, this can be quite inefficient, 7731// because it leads to N-1 getAddExpr calls for N ultimate operands. 7732// Instead, gather up all the operands and make a single getAddExpr call. 7733// LLVM IR canonical form means we need only traverse the left operands. 7742// If a NUW or NSW flag can be applied to the SCEV for this 7743// addition, then compute the SCEV for this addition by itself 7744// with a separate call to getAddExpr. We need to do that 7745// instead of pushing the operands of the addition onto AddOps, 7746// since the flags are only known to apply to this particular 7747// addition - they may not apply to other additions that can be 7748// formed with operands from AddOps. 7753if (BO->Opcode == Instruction::Sub)
7761if (BO->Opcode == Instruction::Sub)
7767 dyn_cast<Instruction>(V));
7768if (!NewBO || (NewBO->Opcode != Instruction::Add &&
7769 NewBO->Opcode != Instruction::Sub)) {
7779case Instruction::Mul: {
7799 dyn_cast<Instruction>(V));
7800if (!NewBO || NewBO->Opcode != Instruction::Mul) {
7809case Instruction::UDiv:
7813case Instruction::URem:
7817case Instruction::Sub: {
7820Flags = getNoWrapFlagsFromUB(BO->
Op);
7825case Instruction::And:
7826// For an expression like x&255 that merely masks off the high bits, 7827// use zext(trunc(x)) as the SCEV expression. 7828if (
ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
7831if (CI->isMinusOne())
7833constAPInt &
A = CI->getValue();
7835// Instcombine's ShrinkDemandedConstant may strip bits out of 7836// constants, obscuring what would otherwise be a low-bits mask. 7837// Use computeKnownBits to compute what ShrinkDemandedConstant 7838// knew about to reconstruct a low-bits mask value. 7839unsigned LZ =
A.countl_zero();
7840unsigned TZ =
A.countr_zero();
7844 0, &AC,
nullptr, &DT);
7846APInt EffectiveMask =
7848if ((LZ != 0 || TZ != 0) && !((~
A & ~Known.
Zero) & EffectiveMask)) {
7851constSCEV *ShiftedLHS =
nullptr;
7852if (
auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
7853if (
auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
7854// For an expression like (x * 8) & 8, simplify the multiply. 7855unsigned MulZeros = OpC->getAPInt().countr_zero();
7856unsigned GCD = std::min(MulZeros, TZ);
7861auto *NewMul =
getMulExpr(MulOps, LHSMul->getNoWrapFlags());
7875// Binary `and` is a bit-wise `umin`. 7883case Instruction::Or:
7884// Binary `or` is a bit-wise `umax`. 7892case Instruction::Xor:
7893if (
ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
7894// If the RHS of xor is -1, then this is a not operation. 7895if (CI->isMinusOne())
7898// Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. 7899// This is a variant of the check for xor with -1, and it handles 7900// the case where instcombine has trimmed non-demanded bits out 7901// of an xor with -1. 7902if (
auto *LBO = dyn_cast<BinaryOperator>(BO->LHS))
7903if (
ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1)))
7904if (LBO->getOpcode() == Instruction::And &&
7905 LCI->getValue() == CI->getValue())
7907 dyn_cast<SCEVZeroExtendExpr>(
getSCEV(BO->LHS))) {
7909constSCEV *Z0 =
Z->getOperand();
7913// If C is a low-bits mask, the zero extend is serving to 7914// mask off the high bits. Complement the operand and 7915// re-apply the zext. 7916if (CI->getValue().isMask(Z0TySize))
7919// If C is a single bit, it may be in the sign-bit position 7920// before the zero-extend. In this case, represent the xor 7921// using an add, which is equivalent, and re-apply the zext. 7931case Instruction::Shl:
7932// Turn shift left of a constant amount into a multiply. 7933if (
ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
7936// If the shift count is not less than the bitwidth, the result of 7937// the shift is undefined. Don't try to analyze it, because the 7938// resolution chosen here may differ from the resolution chosen in 7939// other parts of the compiler. 7943// We can safely preserve the nuw flag in all cases. It's also safe to 7944// turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation 7945// requires special handling. It can be preserved as long as we're not 7946// left shifting by bitwidth - 1. 7949auto MulFlags = getNoWrapFlagsFromUB(BO->
Op);
7963case Instruction::AShr:
7964// AShr X, C, where C is a constant. 7971// If the shift count is not less than the bitwidth, the result of 7972// the shift is undefined. Don't try to analyze it, because the 7973// resolution chosen here may differ from the resolution chosen in 7974// other parts of the compiler. 7979returngetSCEV(BO->LHS);
// shift by zero --> noop 7984Operator *
L = dyn_cast<Operator>(BO->LHS);
7985constSCEV *AddTruncateExpr =
nullptr;
7987constSCEV *AddConstant =
nullptr;
7989if (L &&
L->getOpcode() == Instruction::Add) {
7993// n, c and m are constants. 7995Operator *LShift = dyn_cast<Operator>(
L->getOperand(0));
7996ConstantInt *AddOperandCI = dyn_cast<ConstantInt>(
L->getOperand(1));
7997if (LShift && LShift->
getOpcode() == Instruction::Shl) {
8000 ShlAmtCI = dyn_cast<ConstantInt>(LShift->
getOperand(1));
8001// since we truncate to TruncTy, the AddConstant should be of the 8002// same type, so create a new Constant with type same as TruncTy. 8003// Also, the Add constant should be shifted right by AShr amount. 8006// we model the expression as sext(add(trunc(A), c << n)), since the 8007// sext(trunc) part is already handled below, we create a 8008// AddExpr(TruncExp) which will be used later. 8012 }
elseif (L &&
L->getOpcode() == Instruction::Shl) {
8015// Both n and m are constant. 8018 ShlAmtCI = dyn_cast<ConstantInt>(
L->getOperand(1));
8022if (AddTruncateExpr && ShlAmtCI) {
8023// We can merge the two given cases into a single SCEV statement, 8024// incase n = m, the mul expression will be 2^0, so it gets resolved to 8025// a simpler case. The following code handles the two cases: 8027// 1) For a two-shift sext-inreg, i.e. n = m, 8028// use sext(trunc(x)) as the SCEV expression. 8030// 2) When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV 8031// expression. We already checked that ShlAmt < BitWidth, so 8032// the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as 8033// ShlAmt - AShrAmt < Amt. 8038constSCEV *CompositeExpr =
8040if (
L->getOpcode() != Instruction::Shl)
8041 CompositeExpr =
getAddExpr(CompositeExpr, AddConstant);
8050switch (
U->getOpcode()) {
8051case Instruction::Trunc:
8054case Instruction::ZExt:
8057case Instruction::SExt:
8059 dyn_cast<Instruction>(V))) {
8060// The NSW flag of a subtract does not always survive the conversion to 8061// A + (-1)*B. By pushing sign extension onto its operands we are much 8062// more likely to preserve NSW and allow later AddRec optimisations. 8064// NOTE: This is effectively duplicating this logic from getSignExtend: 8065// sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> 8066// but by that point the NSW information has potentially been lost. 8067if (BO->Opcode == Instruction::Sub && BO->IsNSW) {
8068Type *Ty =
U->getType();
8076case Instruction::BitCast:
8077// BitCasts are no-op casts so we just eliminate the cast. 8082case Instruction::PtrToInt: {
8083// Pointer to integer cast is straight-forward, so do model it. 8085Type *DstIntTy =
U->getType();
8086// But only if effective SCEV (integer) type is wide enough to represent 8087// all possible pointer values. 8089if (isa<SCEVCouldNotCompute>(IntOp))
8093case Instruction::IntToPtr:
8094// Just don't deal with inttoptr casts. 8097case Instruction::SDiv:
8098// If both operands are non-negative, this is just an udiv. 8104case Instruction::SRem:
8105// If both operands are non-negative, this is just an urem. 8111case Instruction::GetElementPtr:
8112return createNodeForGEP(cast<GEPOperator>(U));
8114case Instruction::PHI:
8115return createNodeForPHI(cast<PHINode>(U));
8117case Instruction::Select:
8118return createNodeForSelectOrPHI(U,
U->getOperand(0),
U->getOperand(1),
8121case Instruction::Call:
8122case Instruction::Invoke:
8123if (
Value *RV = cast<CallBase>(U)->getReturnedArgOperand())
8126if (
auto *
II = dyn_cast<IntrinsicInst>(U)) {
8127switch (
II->getIntrinsicID()) {
8131/*IsNSW=*/cast<ConstantInt>(
II->getArgOperand(1))->
isOne());
8132case Intrinsic::umax:
8136case Intrinsic::umin:
8140case Intrinsic::smax:
8144case Intrinsic::smin:
8148case Intrinsic::usub_sat: {
8154case Intrinsic::uadd_sat: {
8160case Intrinsic::start_loop_iterations:
8161case Intrinsic::annotation:
8162case Intrinsic::ptr_annotation:
8163// A start_loop_iterations or llvm.annotation or llvm.prt.annotation is 8164// just eqivalent to the first operand for SCEV purposes. 8166case Intrinsic::vscale:
8178//===----------------------------------------------------------------------===// 8179// Iteration Count Computation Code 8183if (isa<SCEVCouldNotCompute>(ExitCount))
8186auto *ExitCountType = ExitCount->
getType();
8187assert(ExitCountType->isIntegerTy());
8189 1 + ExitCountType->getScalarSizeInBits());
8196if (isa<SCEVCouldNotCompute>(ExitCount))
8202auto CanAddOneWithoutOverflow = [&]() {
8204 getRangeRef(ExitCount, RangeSignHint::HINT_RANGE_UNSIGNED);
8212// If we need to zero extend the backedge count, check if we can add one to 8213// it prior to zero extending without overflow. Provided this is safe, it 8214// allows better simplification of the +1. 8215if (EvalSize > ExitCountSize && CanAddOneWithoutOverflow())
8219// Get the total trip count from the count by adding 1. This may wrap. 8229// Guard against huge trip counts. 8233// In case of integer overflow, this returns 0, which is correct. 8245assert(ExitingBlock &&
"Must pass a non-null exiting block!");
8246assert(L->isLoopExiting(ExitingBlock) &&
8247"Exiting block must actually branch out of the loop!");
8256constauto *MaxExitCount =
8264 L->getExitingBlocks(ExitingBlocks);
8266 std::optional<unsigned> Res;
8267for (
auto *ExitingBB : ExitingBlocks) {
8271 Res = (
unsigned)std::gcd(*Res, Multiple);
8273return Res.value_or(1);
8277constSCEV *ExitCount) {
8281// Get the trip count 8285// If a trip multiple is huge (>=2^32), the trip count is still divisible by 8286// the greatest power of 2 divisor less than 2^32. 8292/// Returns the largest constant divisor of the trip count of this loop as a 8293/// normal unsigned value, if possible. This means that the actual trip count is 8294/// always a multiple of the returned value (don't forget the trip count could 8295/// very well be zero as well!). 8297/// Returns 1 if the trip count is unknown or not guaranteed to be the 8298/// multiple of a constant (which is also the case if the trip count is simply 8299/// constant, use getSmallConstantTripCount for that case), Will also return 1 8300/// if the trip count is very large (>= 2^32). 8302/// As explained in the comments for getSmallConstantTripCount, this assumes 8303/// that control exits the loop via ExitingBlock. 8307assert(ExitingBlock &&
"Must pass a non-null exiting block!");
8308assert(L->isLoopExiting(ExitingBlock) &&
8309"Exiting block must actually branch out of the loop!");
8319return getBackedgeTakenInfo(L).getExact(ExitingBlock,
this);
8321return getBackedgeTakenInfo(L).getSymbolicMax(ExitingBlock,
this);
8323return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock,
this);
8333return getPredicatedBackedgeTakenInfo(L).getExact(ExitingBlock,
this,
8336return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(ExitingBlock,
this,
8339return getPredicatedBackedgeTakenInfo(L).getConstantMax(ExitingBlock,
this,
8347return getPredicatedBackedgeTakenInfo(L).getExact(L,
this, &Preds);
8354return getBackedgeTakenInfo(L).getExact(L,
this);
8356return getBackedgeTakenInfo(L).getConstantMax(
this);
8358return getBackedgeTakenInfo(L).getSymbolicMax(L,
this);
8365return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(L,
this, &Preds);
8370return getPredicatedBackedgeTakenInfo(L).getConstantMax(
this, &Preds);
8374return getBackedgeTakenInfo(L).isConstantMaxOrZero(
this);
8377/// Push PHI nodes in the header of the given loop onto the given Worklist. 8383// Push all Loop-header PHIs onto the Worklist stack. 8384for (
PHINode &PN : Header->phis())
8385if (Visited.
insert(&PN).second)
8389ScalarEvolution::BackedgeTakenInfo &
8390ScalarEvolution::getPredicatedBackedgeTakenInfo(
constLoop *L) {
8391auto &BTI = getBackedgeTakenInfo(L);
8392if (BTI.hasFullInfo())
8395auto Pair = PredicatedBackedgeTakenCounts.insert({
L, BackedgeTakenInfo()});
8398return Pair.first->second;
8400 BackedgeTakenInfo
Result =
8401 computeBackedgeTakenCount(L,
/*AllowPredicates=*/true);
8403return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
8406ScalarEvolution::BackedgeTakenInfo &
8407ScalarEvolution::getBackedgeTakenInfo(
constLoop *L) {
8408// Initially insert an invalid entry for this loop. If the insertion 8409// succeeds, proceed to actually compute a backedge-taken count and 8410// update the value. The temporary CouldNotCompute value tells SCEV 8411// code elsewhere that it shouldn't attempt to request a new 8412// backedge-taken count, which could result in infinite recursion. 8413 std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator,
bool> Pair =
8414 BackedgeTakenCounts.insert({
L, BackedgeTakenInfo()});
8416return Pair.first->second;
8418// computeBackedgeTakenCount may allocate memory for its result. Inserting it 8419// into the BackedgeTakenCounts map transfers ownership. Otherwise, the result 8420// must be cleared in this scope. 8421 BackedgeTakenInfo
Result = computeBackedgeTakenCount(L);
8423// Now that we know more about the trip count for this loop, forget any 8424// existing SCEV values for PHI nodes in this loop since they are only 8425// conservative estimates made without the benefit of trip count 8426// information. This invalidation is not necessary for correctness, and is 8427// only done to produce more precise results. 8428if (
Result.hasAnyInfo()) {
8429// Invalidate any expression using an addrec in this loop. 8431auto LoopUsersIt = LoopUsers.find(L);
8432if (LoopUsersIt != LoopUsers.end())
8434 forgetMemoizedResults(ToForget);
8436// Invalidate constant-evolved loop header phis. 8437for (
PHINode &PN :
L->getHeader()->phis())
8438 ConstantEvolutionLoopExitValue.erase(&PN);
8441// Re-lookup the insert position, since the call to 8442// computeBackedgeTakenCount above could result in a 8443// recusive call to getBackedgeTakenInfo (on a different 8444// loop), which would invalidate the iterator computed 8446return BackedgeTakenCounts.find(L)->second = std::move(Result);
8450// This method is intended to forget all info about loops. It should 8451// invalidate caches as if the following happened: 8452// - The trip counts of all loops have changed arbitrarily 8453// - Every llvm::Value has been updated in place to produce a different 8455 BackedgeTakenCounts.clear();
8456 PredicatedBackedgeTakenCounts.clear();
8457 BECountUsers.clear();
8458 LoopPropertiesCache.clear();
8459 ConstantEvolutionLoopExitValue.clear();
8460 ValueExprMap.
clear();
8461 ValuesAtScopes.clear();
8462 ValuesAtScopesUsers.clear();
8463 LoopDispositions.clear();
8464 BlockDispositions.clear();
8465 UnsignedRanges.clear();
8466 SignedRanges.clear();
8467 ExprValueMap.
clear();
8469 ConstantMultipleCache.clear();
8470 PredicatedSCEVRewrites.clear();
8472 FoldCacheUser.clear();
8474void ScalarEvolution::visitAndClearUsers(
8478while (!Worklist.
empty()) {
8480if (!
isSCEVable(
I->getType()) && !isa<WithOverflowInst>(
I))
8485if (It != ValueExprMap.
end()) {
8486 eraseValueFromMap(It->first);
8488if (
PHINode *PN = dyn_cast<PHINode>(
I))
8489 ConstantEvolutionLoopExitValue.erase(PN);
8502// Iterate over all the loops and sub-loops to drop SCEV information. 8503while (!LoopWorklist.
empty()) {
8506// Drop any stored trip count value. 8507 forgetBackedgeTakenCounts(CurrL,
/* Predicated */false);
8508 forgetBackedgeTakenCounts(CurrL,
/* Predicated */true);
8510// Drop information about predicated SCEV rewrites for this loop. 8511for (
autoI = PredicatedSCEVRewrites.begin();
8512I != PredicatedSCEVRewrites.end();) {
8513 std::pair<const SCEV *, const Loop *> Entry =
I->first;
8514if (Entry.second == CurrL)
8515 PredicatedSCEVRewrites.erase(
I++);
8520auto LoopUsersItr = LoopUsers.find(CurrL);
8521if (LoopUsersItr != LoopUsers.end()) {
8522 ToForget.
insert(ToForget.
end(), LoopUsersItr->second.begin(),
8523 LoopUsersItr->second.end());
8526// Drop information about expressions based on loop-header PHIs. 8528 visitAndClearUsers(Worklist, Visited, ToForget);
8530 LoopPropertiesCache.erase(CurrL);
8531// Forget all contained loops too, to avoid dangling entries in the 8532// ValuesAtScopes map. 8533 LoopWorklist.
append(CurrL->begin(), CurrL->end());
8535 forgetMemoizedResults(ToForget);
8546// Drop information about expressions based on loop-header PHIs. 8552 visitAndClearUsers(Worklist, Visited, ToForget);
8554 forgetMemoizedResults(ToForget);
8561// If SCEV looked through a trivial LCSSA phi node, we might have SCEV's 8562// directly using a SCEVUnknown/SCEVAddRec defined in the loop. After an 8563// extra predecessor is added, this is no longer valid. Find all Unknowns and 8564// AddRecs defined in the loop and invalidate any SCEV's making use of them. 8566structInvalidationRootCollector {
8570 InvalidationRootCollector(
Loop *L) : L(L) {}
8572bool follow(
constSCEV *S) {
8573if (
auto *SU = dyn_cast<SCEVUnknown>(S)) {
8574if (
auto *
I = dyn_cast<Instruction>(SU->getValue()))
8577 }
elseif (
auto *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
8578if (L->contains(AddRec->
getLoop()))
8583bool isDone()
const{
returnfalse; }
8586 InvalidationRootCollector
C(L);
8588 forgetMemoizedResults(
C.Roots);
8591// Also perform the normal invalidation. 8598// Unless a specific value is passed to invalidation, completely clear both 8601 BlockDispositions.clear();
8602 LoopDispositions.clear();
8613// Invalidate the block and loop dispositions cached for S. Dispositions of 8614// S's users may change if S's disposition changes (i.e. a user may change to 8615// loop-invariant, if S changes to loop invariant), so also invalidate 8616// dispositions of S's users recursively. 8619while (!Worklist.
empty()) {
8621bool LoopDispoRemoved = LoopDispositions.erase(Curr);
8622bool BlockDispoRemoved = BlockDispositions.erase(Curr);
8623if (!LoopDispoRemoved && !BlockDispoRemoved)
8625autoUsers = SCEVUsers.find(Curr);
8626if (
Users != SCEVUsers.end())
8633/// Get the exact loop backedge taken count considering all loop exits. A 8634/// computable result can only be returned for loops with all exiting blocks 8635/// dominating the latch. howFarToZero assumes that the limit of each loop test 8636/// is never skipped. This is a valid assumption as long as the loop exits via 8637/// that test. For precise results, it is the caller's responsibility to specify 8638/// the relevant loop exiting block using getExact(ExitingBlock, SE). 8639constSCEV *ScalarEvolution::BackedgeTakenInfo::getExact(
8642// If any exits were not computable, the loop is not computable. 8643if (!isComplete() || ExitNotTaken.empty())
8647// All exiting blocks we have collected must dominate the only backedge. 8651// All exiting blocks we have gathered dominate loop's latch, so exact trip 8652// count is simply a minimum out of all these calculated exit counts. 8654for (
constauto &ENT : ExitNotTaken) {
8655constSCEV *BECount = ENT.ExactNotTaken;
8658"We should only have known counts for exiting blocks that dominate " 8666assert((Preds || ENT.hasAlwaysTruePredicate()) &&
8667"Predicate should be always true!");
8670// If an earlier exit exits on the first iteration (exit count zero), then 8671// a later poison exit count should not propagate into the result. This are 8672// exactly the semantics provided by umin_seq. 8676const ScalarEvolution::ExitNotTakenInfo *
8677ScalarEvolution::BackedgeTakenInfo::getExitNotTaken(
8680for (
constauto &ENT : ExitNotTaken)
8681if (ENT.ExitingBlock == ExitingBlock) {
8682if (ENT.hasAlwaysTruePredicate())
8684elseif (Predicates) {
8693/// getConstantMax - Get the constant max backedge taken count for the loop. 8694constSCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
8697if (!getConstantMax())
8700for (
constauto &ENT : ExitNotTaken)
8701if (!ENT.hasAlwaysTruePredicate()) {
8707assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
8708 isa<SCEVConstant>(getConstantMax())) &&
8709"No point in having a non-constant max backedge taken count!");
8710return getConstantMax();
8713constSCEV *ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(
8717// Form an expression for the maximum exit count possible for this loop. We 8718// merge the max and exact information to approximate a version of 8719// getConstantMaxBackedgeTakenCount which isn't restricted to just 8723for (
constauto &ENT : ExitNotTaken) {
8724constSCEV *ExitCount = ENT.SymbolicMaxNotTaken;
8725if (!isa<SCEVCouldNotCompute>(ExitCount)) {
8727"We should only have known counts for exiting blocks that " 8733assert((Predicates || ENT.hasAlwaysTruePredicate()) &&
8734"Predicate should be always true!");
8737if (ExitCounts.
empty())
8746bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero(
8748auto PredicateNotAlwaysTrue = [](
const ExitNotTakenInfo &ENT) {
8749return !ENT.hasAlwaysTruePredicate();
8751return MaxOrZero && !
any_of(ExitNotTaken, PredicateNotAlwaysTrue);
8758constSCEV *E,
constSCEV *ConstantMaxNotTaken,
8759constSCEV *SymbolicMaxNotTaken,
bool MaxOrZero,
8761 : ExactNotTaken(E), ConstantMaxNotTaken(ConstantMaxNotTaken),
8762 SymbolicMaxNotTaken(SymbolicMaxNotTaken), MaxOrZero(MaxOrZero) {
8763// If we prove the max count is zero, so is the symbolic bound. This happens 8764// in practice due to differences in a) how context sensitive we've chosen 8765// to be and b) how we reason about bounds implied by UB. 8773"Exact is not allowed to be less precise than Constant Max");
8776"Exact is not allowed to be less precise than Symbolic Max");
8779"Symbolic Max is not allowed to be less precise than Constant Max");
8782"No point in having a non-constant max backedge taken count!");
8784for (
constauto PredList : PredLists)
8785for (
constauto *
P : PredList) {
8788assert(!isa<SCEVUnionPredicate>(
P) &&
"Only add leaf predicates here!");
8793"Backedge count should be int");
8796"Max backedge count should be int");
8800constSCEV *ConstantMaxNotTaken,
8801constSCEV *SymbolicMaxNotTaken,
8804 :
ExitLimit(E, ConstantMaxNotTaken, SymbolicMaxNotTaken, MaxOrZero,
8807/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each 8808/// computable exit into a persistent ExitNotTakenInfo array. 8809ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
8811bool IsComplete,
constSCEV *ConstantMax,
bool MaxOrZero)
8812 : ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) {
8813usingEdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
8815 ExitNotTaken.reserve(ExitCounts.
size());
8816 std::transform(ExitCounts.
begin(), ExitCounts.
end(),
8817 std::back_inserter(ExitNotTaken),
8818 [&](
const EdgeExitInfo &EEI) {
8819 BasicBlock *ExitBB = EEI.first;
8820 const ExitLimit &EL = EEI.second;
8821 return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken,
8822 EL.ConstantMaxNotTaken, EL.SymbolicMaxNotTaken,
8825assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
8826 isa<SCEVConstant>(ConstantMax)) &&
8827"No point in having a non-constant max backedge taken count!");
8830/// Compute the number of times the backedge of the specified loop will execute. 8831ScalarEvolution::BackedgeTakenInfo
8832ScalarEvolution::computeBackedgeTakenCount(
constLoop *L,
8833bool AllowPredicates) {
8835 L->getExitingBlocks(ExitingBlocks);
8837usingEdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
8840bool CouldComputeBECount =
true;
8841BasicBlock *Latch = L->getLoopLatch();
// may be NULL. 8842constSCEV *MustExitMaxBECount =
nullptr;
8843constSCEV *MayExitMaxBECount =
nullptr;
8844bool MustExitMaxOrZero =
false;
8845bool IsOnlyExit = ExitingBlocks.
size() == 1;
8847// Compute the ExitLimit for each loop exit. Use this to populate ExitCounts 8848// and compute maxBECount. 8849// Do a union of all the predicates here. 8851// We canonicalize untaken exits to br (constant), ignore them so that 8852// proving an exit untaken doesn't negatively impact our ability to reason 8853// about the loop as whole. 8854if (
auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
8855if (
auto *CI = dyn_cast<ConstantInt>(BI->
getCondition())) {
8857if (ExitIfTrue == CI->
isZero())
8861 ExitLimit EL = computeExitLimit(L, ExitBB, IsOnlyExit, AllowPredicates);
8863assert((AllowPredicates || EL.Predicates.empty()) &&
8864"Predicated exit limit when predicates are not allowed!");
8866// 1. For each exit that can be computed, add an entry to ExitCounts. 8867// CouldComputeBECount is true only if all exits can be computed. 8869 ++NumExitCountsComputed;
8871// We couldn't compute an exact value for this exit, so 8872// we won't be able to compute an exact value for the loop. 8873 CouldComputeBECount =
false;
8874// Remember exit count if either exact or symbolic is known. Because 8875// Exact always implies symbolic, only check symbolic. 8880"Exact is known but symbolic isn't?");
8881 ++NumExitCountsNotComputed;
8884// 2. Derive the loop's MaxBECount from each exit's max number of 8885// non-exiting iterations. Partition the loop exits into two kinds: 8886// LoopMustExits and LoopMayExits. 8888// If the exit dominates the loop latch, it is a LoopMustExit otherwise it 8889// is a LoopMayExit. If any computable LoopMustExit is found, then 8890// MaxBECount is the minimum EL.ConstantMaxNotTaken of computable 8891// LoopMustExits. Otherwise, MaxBECount is conservatively the maximum 8892// EL.ConstantMaxNotTaken, where CouldNotCompute is considered greater than 8894// computable EL.ConstantMaxNotTaken. 8897if (!MustExitMaxBECount) {
8898 MustExitMaxBECount = EL.ConstantMaxNotTaken;
8899 MustExitMaxOrZero = EL.MaxOrZero;
8902 EL.ConstantMaxNotTaken);
8906 MayExitMaxBECount = EL.ConstantMaxNotTaken;
8909 EL.ConstantMaxNotTaken);
8913constSCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
8915// The loop backedge will be taken the maximum or zero times if there's 8916// a single exit that must be taken the maximum or zero times. 8917bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
8919// Remember which SCEVs are used in exit limits for invalidation purposes. 8920// We only care about non-constant SCEVs here, so we can ignore 8921// EL.ConstantMaxNotTaken 8922// and MaxBECount, which must be SCEVConstant. 8923for (
constauto &Pair : ExitCounts) {
8924if (!isa<SCEVConstant>(Pair.second.ExactNotTaken))
8925 BECountUsers[Pair.second.ExactNotTaken].insert({L, AllowPredicates});
8926if (!isa<SCEVConstant>(Pair.second.SymbolicMaxNotTaken))
8927 BECountUsers[Pair.second.SymbolicMaxNotTaken].insert(
8928 {L, AllowPredicates});
8930return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
8931 MaxBECount, MaxOrZero);
8935ScalarEvolution::computeExitLimit(
constLoop *L,
BasicBlock *ExitingBlock,
8936bool IsOnlyExit,
bool AllowPredicates) {
8937assert(
L->contains(ExitingBlock) &&
"Exit count for non-loop block?");
8938// If our exiting block does not dominate the latch, then its connection with 8939// loop's exit limit may be far from trivial. 8941if (!Latch || !DT.
dominates(ExitingBlock, Latch))
8945if (
BranchInst *BI = dyn_cast<BranchInst>(Term)) {
8949"It should have one successor in loop and one exit block!");
8950// Proceed to the next level to examine the exit condition expression. 8952/*ControlsOnlyExit=*/IsOnlyExit,
8956if (
SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
8957// For switch, make sure that there is a single exit from the loop. 8960if (!
L->contains(SBB)) {
8961if (Exit)
// Multiple exit successors. 8965assert(Exit &&
"Exiting block must have at least one exit");
8966return computeExitLimitFromSingleExitSwitch(
8967 L, SI, Exit,
/*ControlsOnlyExit=*/IsOnlyExit);
8974constLoop *L,
Value *ExitCond,
bool ExitIfTrue,
bool ControlsOnlyExit,
8975bool AllowPredicates) {
8976 ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates);
8977return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue,
8978 ControlsOnlyExit, AllowPredicates);
8981std::optional<ScalarEvolution::ExitLimit>
8982ScalarEvolution::ExitLimitCache::find(
constLoop *L,
Value *ExitCond,
8983bool ExitIfTrue,
bool ControlsOnlyExit,
8984bool AllowPredicates) {
8986 (void)this->ExitIfTrue;
8987 (void)this->AllowPredicates;
8989assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
8990 this->AllowPredicates == AllowPredicates &&
8991"Variance in assumed invariant key components!");
8992auto Itr = TripCountMap.find({ExitCond, ControlsOnlyExit});
8993if (Itr == TripCountMap.end())
8998void ScalarEvolution::ExitLimitCache::insert(
constLoop *L,
Value *ExitCond,
9000bool ControlsOnlyExit,
9001bool AllowPredicates,
9002const ExitLimit &EL) {
9003assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
9004 this->AllowPredicates == AllowPredicates &&
9005"Variance in assumed invariant key components!");
9007auto InsertResult = TripCountMap.insert({{ExitCond, ControlsOnlyExit}, EL});
9008assert(InsertResult.second &&
"Expected successful insertion!");
9014 ExitLimitCacheTy &Cache,
constLoop *L,
Value *ExitCond,
bool ExitIfTrue,
9015bool ControlsOnlyExit,
bool AllowPredicates) {
9017if (
auto MaybeEL = Cache.find(L, ExitCond, ExitIfTrue, ControlsOnlyExit,
9021 ExitLimit EL = computeExitLimitFromCondImpl(
9022 Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates);
9023 Cache.insert(L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates, EL);
9028 ExitLimitCacheTy &Cache,
constLoop *L,
Value *ExitCond,
bool ExitIfTrue,
9029bool ControlsOnlyExit,
bool AllowPredicates) {
9030// Handle BinOp conditions (And, Or). 9031if (
auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
9032 Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates))
9033return *LimitFromBinOp;
9035// With an icmp, it may be feasible to compute an exact backedge-taken count. 9036// Proceed to the next level to examine the icmp. 9037if (
ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
9039 computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsOnlyExit);
9040if (EL.hasFullInfo() || !AllowPredicates)
9043// Try again, but use SCEV predicates this time. 9044return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue,
9046/*AllowPredicates=*/true);
9049// Check for a constant condition. These are normally stripped out by 9050// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to 9051// preserve the CFG and is temporarily leaving constant conditions 9053if (
ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
9055// The backedge is always taken. 9057// The backedge is never taken. 9061// If we're exiting based on the overflow flag of an x.with.overflow intrinsic 9062// with a constant step, we can form an equivalent icmp predicate and figure 9063// out how many iterations will be taken before we exit. 9079auto EL = computeExitLimitFromICmp(L, Pred, LHS,
getConstant(NewRHSC),
9080 ControlsOnlyExit, AllowPredicates);
9085// If it's not an integer or pointer comparison then compute it the hard way. 9086return computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
9089std::optional<ScalarEvolution::ExitLimit>
9090ScalarEvolution::computeExitLimitFromCondFromBinOp(
9091 ExitLimitCacheTy &Cache,
constLoop *L,
Value *ExitCond,
bool ExitIfTrue,
9092bool ControlsOnlyExit,
bool AllowPredicates) {
9093// Check if the controlling expression for this loop is an And or Or. 9103// EitherMayExit is true in these two cases: 9104// br (and Op0 Op1), loop, exit 9105// br (or Op0 Op1), exit, loop 9106bool EitherMayExit = IsAnd ^ ExitIfTrue;
9107 ExitLimit EL0 = computeExitLimitFromCondCached(
9108 Cache, L, Op0, ExitIfTrue, ControlsOnlyExit && !EitherMayExit,
9110 ExitLimit EL1 = computeExitLimitFromCondCached(
9111 Cache, L, Op1, ExitIfTrue, ControlsOnlyExit && !EitherMayExit,
9114// Be robust against unsimplified IR for the form "op i1 X, NeutralElement" 9115constConstant *NeutralElement = ConstantInt::get(ExitCond->
getType(), IsAnd);
9116if (isa<ConstantInt>(Op1))
9117return Op1 == NeutralElement ? EL0 : EL1;
9118if (isa<ConstantInt>(Op0))
9119return Op0 == NeutralElement ? EL1 : EL0;
9125bool UseSequentialUMin = !isa<BinaryOperator>(ExitCond);
9126// Both conditions must be same for the loop to continue executing. 9127// Choose the less conservative count. 9134 ConstantMaxBECount = EL1.ConstantMaxNotTaken;
9136 ConstantMaxBECount = EL0.ConstantMaxNotTaken;
9139 EL1.ConstantMaxNotTaken);
9141 SymbolicMaxBECount = EL1.SymbolicMaxNotTaken;
9143 SymbolicMaxBECount = EL0.SymbolicMaxNotTaken;
9146 EL0.SymbolicMaxNotTaken, EL1.SymbolicMaxNotTaken, UseSequentialUMin);
9148// Both conditions must be same at the same time for the loop to exit. 9149// For now, be conservative. 9150if (EL0.ExactNotTaken == EL1.ExactNotTaken)
9151 BECount = EL0.ExactNotTaken;
9154// There are cases (e.g. PR26207) where computeExitLimitFromCond is able 9155// to be more aggressive when computing BECount than when computing 9156// ConstantMaxBECount. In these cases it is possible for EL0.ExactNotTaken 9158// EL1.ExactNotTaken to match, but for EL0.ConstantMaxNotTaken and 9159// EL1.ConstantMaxNotTaken to not. 9160if (isa<SCEVCouldNotCompute>(ConstantMaxBECount) &&
9161 !isa<SCEVCouldNotCompute>(BECount))
9163if (isa<SCEVCouldNotCompute>(SymbolicMaxBECount))
9164 SymbolicMaxBECount =
9165 isa<SCEVCouldNotCompute>(BECount) ? ConstantMaxBECount : BECount;
9166return ExitLimit(BECount, ConstantMaxBECount, SymbolicMaxBECount,
false,
9171constLoop *L,
ICmpInst *ExitCond,
bool ExitIfTrue,
bool ControlsOnlyExit,
9172bool AllowPredicates) {
9173// If the condition was exit on true, convert the condition to exit on false 9184 ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsOnlyExit,
9189auto *ExhaustiveCount =
9190 computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
9192if (!isa<SCEVCouldNotCompute>(ExhaustiveCount))
9193return ExhaustiveCount;
9195return computeShiftCompareExitLimit(ExitCond->
getOperand(0),
9200bool ControlsOnlyExit,
bool AllowPredicates) {
9202// Try to evaluate any dependencies out of the loop. 9206// At this point, we would like to compute how many iterations of the 9207// loop the predicate will return true for these inputs. 9209// If there is a loop-invariant, force it into the RHS. 9216// Simplify the operands before analyzing them. 9219// If we have a comparison of a chrec against a constant, try to use value 9220// ranges to answer this query. 9221if (
constSCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
9224// Form the constant range. 9229if (!isa<SCEVCouldNotCompute>(Ret))
returnRet;
9232// If this loop must exit based on this condition (or execute undefined 9233// behaviour), see if we can improve wrap flags. This is essentially 9234// a must execute style proof. 9236// If we can prove the test sequence produced must repeat the same values 9237// on self-wrap of the IV, then we can infer that IV doesn't self wrap 9238// because if it did, we'd have an infinite (undefined) loop. 9239// TODO: We can peel off any functions which are invertible *in L*. Loop 9240// invariant terms are effectively constants for our purposes here. 9242if (
auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS))
9243 InnerLHS = ZExt->getOperand();
9247/*OrNegative=*/true)) {
9255// For a slt/ult condition with a positive step, can we prove nsw/nuw? 9256// From no-self-wrap, this follows trivially from the fact that every 9257// (un)signed-wrapped, but not self-wrapped value must be LT than the 9258// last value before (un)signed wrap. Since we know that last value 9259// didn't exit, nor will any smaller one. 9277// Convert to: while (X-Y != 0) 9280if (isa<SCEVCouldNotCompute>(LHS))
9285if (isa<SCEVCouldNotCompute>(RHS))
9288 ExitLimit EL = howFarToZero(
getMinusSCEV(LHS, RHS), L, ControlsOnlyExit,
9295// Convert to: while (X-Y == 0) 9298if (isa<SCEVCouldNotCompute>(LHS))
9303if (isa<SCEVCouldNotCompute>(RHS))
9306 ExitLimit EL = howFarToNonZero(
getMinusSCEV(LHS, RHS), L);
9307if (EL.hasAnyInfo())
return EL;
9312// Since the loop is finite, an invariant RHS cannot include the boundary 9313// value, otherwise it would loop forever. 9316// Otherwise, perform the addition in a wider type, to avoid overflow. 9317// If the LHS is an addrec with the appropriate nowrap flag, the 9318// extension will be sunk into it and the exit count can be analyzed. 9319auto *OldType = dyn_cast<IntegerType>(
LHS->
getType());
9322// Prefer doubling the bitwidth over adding a single bit to make it more 9323// likely that we use a legal type. 9339 ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsOnlyExit,
9347// Since the loop is finite, an invariant RHS cannot include the boundary 9348// value, otherwise it would loop forever. 9357 ExitLimit EL = howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsOnlyExit,
9371ScalarEvolution::computeExitLimitFromSingleExitSwitch(
constLoop *L,
9374bool ControlsOnlyExit) {
9375assert(!
L->contains(ExitingBlock) &&
"Not an exiting block!");
9377// Give up if the exit is the default dest of a switch. 9378if (
Switch->getDefaultDest() == ExitingBlock)
9382"Default case must not exit the loop!");
9386// while (X != Y) --> while (X-Y != 0) 9387 ExitLimit EL = howFarToZero(
getMinusSCEV(LHS, RHS), L, ControlsOnlyExit);
9399assert(isa<SCEVConstant>(Val) &&
9400"Evaluation of SCEV at constant didn't fold correctly?");
9401return cast<SCEVConstant>(Val)->getValue();
9414constBasicBlock *Predecessor =
L->getLoopPredecessor();
9418// Return true if V is of the form "LHS `shift_op` <positive constant>". 9419// Return LHS in OutLHS and shift_opt in OutOpCode. 9420auto MatchPositiveShift =
9423using namespacePatternMatch;
9427 OutOpCode = Instruction::LShr;
9429 OutOpCode = Instruction::AShr;
9431 OutOpCode = Instruction::Shl;
9438// Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in 9441// %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] 9442// %iv.shifted = lshr i32 %iv, <positive constant> 9444// Return true on a successful match. Return the corresponding PHI node (%iv 9445// above) in PNOut and the opcode of the shift operation in OpCodeOut. 9446auto MatchShiftRecurrence =
9448 std::optional<Instruction::BinaryOps> PostShiftOpCode;
9454// If we encounter a shift instruction, "peel off" the shift operation, 9455// and remember that we did so. Later when we inspect %iv's backedge 9456// value, we will make sure that the backedge value uses the same 9459// Note: the peeled shift operation does not have to be the same 9460// instruction as the one feeding into the PHI's backedge value. We only 9461// really care about it being the same *kind* of shift instruction -- 9462// that's all that is required for our later inferences to hold. 9463if (MatchPositiveShift(LHS, V, OpC)) {
9464 PostShiftOpCode = OpC;
9469 PNOut = dyn_cast<PHINode>(LHS);
9470if (!PNOut || PNOut->getParent() !=
L->getHeader())
9473Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
9477// The backedge value for the PHI node must be a shift by a positive 9479 MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
9481// of the PHI node itself 9484// and the kind of shift should be match the kind of shift we peeled 9486 (!PostShiftOpCode || *PostShiftOpCode == OpCodeOut);
9491if (!MatchShiftRecurrence(LHS, PN, OpCode))
9496// The key rationale for this optimization is that for some kinds of shift 9497// recurrences, the value of the recurrence "stabilizes" to either 0 or -1 9498// within a finite number of iterations. If the condition guarding the 9499// backedge (in the sense that the backedge is taken if the condition is true) 9500// is false for the value the shift recurrence stabilizes to, then we know 9501// that the backedge is taken only a finite number of times. 9508case Instruction::AShr: {
9509// {K,ashr,<positive-constant>} stabilizes to signum(K) in at most 9510// bitwidth(K) iterations. 9516 StableValue = ConstantInt::get(Ty, 0);
9518 StableValue = ConstantInt::get(Ty, -1,
true);
9524case Instruction::LShr:
9525case Instruction::Shl:
9526// Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>} 9527// stabilize to 0 in at most bitwidth(K) iterations. 9528 StableValue = ConstantInt::get(cast<IntegerType>(
RHS->
getType()), 0);
9535"Otherwise cannot be an operand to a branch instruction");
9537if (
Result->isZeroValue()) {
9539constSCEV *UpperBound =
9547/// Return true if we can constant fold an instruction of the specified type, 9548/// assuming that all operands were constants. 9550if (isa<BinaryOperator>(
I) || isa<CmpInst>(
I) ||
9551 isa<SelectInst>(
I) || isa<CastInst>(
I) || isa<GetElementPtrInst>(
I) ||
9552 isa<LoadInst>(
I) || isa<ExtractValueInst>(
I))
9555if (
constCallInst *CI = dyn_cast<CallInst>(
I))
9556if (
constFunction *F = CI->getCalledFunction())
9561/// Determine whether this instruction can constant evolve within this loop 9562/// assuming its operands can all constant evolve. 9564// An instruction outside of the loop can't be derived from a loop PHI. 9565if (!L->contains(
I))
returnfalse;
9567if (isa<PHINode>(
I)) {
9568// We don't currently keep track of the control flow needed to evaluate 9569// PHIs, so we cannot handle PHIs inside of loops. 9570return L->getHeader() ==
I->getParent();
9573// If we won't be able to constant fold this expression even if the operands 9574// are constants, bail early. 9578/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by 9579/// recursing through each instruction operand until reaching a loop header phi. 9587// Otherwise, we can evaluate this instruction if all of its operands are 9588// constant or derived from a PHI node themselves. 9591if (isa<Constant>(
Op))
continue;
9596PHINode *
P = dyn_cast<PHINode>(OpInst);
9598// If this operand is already visited, reuse the prior result. 9599// We may have P != PHI if this is the deepest point at which the 9600// inconsistent paths meet. 9603// Recurse and memoize the results, whether a phi is found or not. 9604// This recursive call invalidates pointers into PHIMap. 9609returnnullptr;
// Not evolving from PHI 9611returnnullptr;
// Evolving from multiple different PHIs. 9614// This is a expression evolving from a constant PHI! 9618/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node 9619/// in the loop that V is derived from. We allow arbitrary operations along the 9620/// way, but the operands of an operation must either be constants or a value 9621/// derived from a constant PHI. If this expression does not fit with these 9622/// constraints, return null. 9627if (
PHINode *PN = dyn_cast<PHINode>(
I))
9630// Record non-constant instructions contained by the loop. 9635/// EvaluateExpression - Given an expression that passes the 9636/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node 9637/// in the loop has the value PHIVal. If we can't fold this expression for some 9638/// reason, return null. 9643// Convenient constant check, but redundant for recursive calls. 9644if (
Constant *
C = dyn_cast<Constant>(V))
returnC;
9646if (!
I)
returnnullptr;
9650// An instruction inside the loop depends on a value outside the loop that we 9651// weren't given a mapping for, or a value such as a call inside the loop. 9654// An unmapped PHI can be due to a branch or another loop inside this loop, 9655// or due to this not being the initial iteration through a loop where we 9656// couldn't compute the evolution of this particular PHI last time. 9657if (isa<PHINode>(
I))
returnnullptr;
9659 std::vector<Constant*>
Operands(
I->getNumOperands());
9661for (
unsigned i = 0, e =
I->getNumOperands(); i != e; ++i) {
9662Instruction *Operand = dyn_cast<Instruction>(
I->getOperand(i));
9664Operands[i] = dyn_cast<Constant>(
I->getOperand(i));
9670if (!
C)
returnnullptr;
9675/*AllowNonDeterministic=*/false);
9679// If every incoming value to PN except the one for BB is a specific Constant, 9680// return that, else return nullptr. 9692if (IncomingVal != CurrentVal) {
9695 IncomingVal = CurrentVal;
9702/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is 9703/// in the header of its containing loop, we know the loop executes a 9704/// constant number of times, and the PHI node is just a recurrence 9705/// involving constants, fold it. 9707ScalarEvolution::getConstantEvolutionLoopExitValue(
PHINode *PN,
9710auto [
I,
Inserted] = ConstantEvolutionLoopExitValue.try_emplace(PN);
9715returnnullptr;
// Not going to evaluate it. 9721assert(PN->
getParent() == Header &&
"Can't evaluate PHI not in loop header!");
9729 CurrentIterVals[&
PHI] = StartCST;
9731if (!CurrentIterVals.
count(PN))
9732return RetVal =
nullptr;
9736// Execute the loop symbolically to determine the exit value. 9738"BEs is <= MaxBruteForceIterations which is an 'unsigned'!");
9740unsigned NumIterations = BEs.
getZExtValue();
// must be in range 9741unsigned IterationNum = 0;
9743for (; ; ++IterationNum) {
9744if (IterationNum == NumIterations)
9745return RetVal = CurrentIterVals[PN];
// Got exit value! 9747// Compute the value of the PHIs for the next iteration. 9748// EvaluateExpression adds non-phi values to the CurrentIterVals map. 9753returnnullptr;
// Couldn't evaluate! 9754 NextIterVals[PN] = NextPHI;
9756bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
9758// Also evaluate the other PHI nodes. However, we don't get to stop if we 9759// cease to be able to evaluate one of them or if they stop evolving, 9760// because that doesn't necessarily prevent us from computing PN. 9762for (
constauto &
I : CurrentIterVals) {
9764if (!
PHI ||
PHI == PN ||
PHI->getParent() != Header)
continue;
9767// We use two distinct loops because EvaluateExpression may invalidate any 9768// iterators into CurrentIterVals. 9769for (
constauto &
I : PHIsToCompute) {
9772if (!NextPHI) {
// Not already computed. 9773Value *BEValue =
PHI->getIncomingValueForBlock(Latch);
9776if (NextPHI !=
I.second)
9777 StoppedEvolving =
false;
9780// If all entries in CurrentIterVals == NextIterVals then we can stop 9781// iterating, the loop can't continue to change. 9783return RetVal = CurrentIterVals[PN];
9785 CurrentIterVals.swap(NextIterVals);
9789constSCEV *ScalarEvolution::computeExitCountExhaustively(
constLoop *L,
9795// If the loop is canonicalized, the PHI will have exactly two entries. 9796// That's the only form we support here. 9801assert(PN->
getParent() == Header &&
"Can't evaluate PHI not in loop header!");
9804assert(Latch &&
"Should follow from NumIncomingValues == 2!");
9808 CurrentIterVals[&
PHI] = StartCST;
9810if (!CurrentIterVals.
count(PN))
9813// Okay, we find a PHI node that defines the trip count of this loop. Execute 9814// the loop symbolically to determine when the condition gets a value of 9818for (
unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
9819auto *CondVal = dyn_cast_or_null<ConstantInt>(
9822// Couldn't symbolically evaluate. 9825if (CondVal->getValue() ==
uint64_t(ExitWhen)) {
9826 ++NumBruteForceTripCountsComputed;
9830// Update all the PHI nodes for the next iteration. 9833// Create a list of which PHIs we need to compute. We want to do this before 9834// calling EvaluateExpression on them because that may invalidate iterators 9835// into CurrentIterVals. 9837for (
constauto &
I : CurrentIterVals) {
9839if (!
PHI ||
PHI->getParent() != Header)
continue;
9844if (NextPHI)
continue;
// Already computed! 9846Value *BEValue =
PHI->getIncomingValueForBlock(Latch);
9849 CurrentIterVals.swap(NextIterVals);
9852// Too many iterations were needed to evaluate. 9859// Check to see if we've folded this expression at this loop before. 9860for (
auto &LS : Values)
9862return LS.second ? LS.second : V;
9866// Otherwise compute it. 9867constSCEV *
C = computeSCEVAtScope(V, L);
9868for (
auto &LS :
reverse(ValuesAtScopes[V]))
9871if (!isa<SCEVConstant>(
C))
9872 ValuesAtScopesUsers[
C].push_back({L, V});
9878/// This builds up a Constant using the ConstantExpr interface. That way, we 9879/// will return Constants for objects which aren't represented by a 9880/// SCEVConstant, because SCEVConstant is restricted to ConstantInt. 9881/// Returns NULL if the SCEV isn't representable as a Constant. 9883switch (V->getSCEVType()) {
9889return cast<SCEVConstant>(V)->getValue();
9891return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
9916assert(!
C->getType()->isPointerTy() &&
9917"Can only have one pointer, and it must be last");
9919// The offsets have been converted to bytes. We can add bytes using 9944ScalarEvolution::getWithOperands(
constSCEV *S,
9953auto *AddRec = cast<SCEVAddRecExpr>(S);
9957returngetAddExpr(NewOps, cast<SCEVAddExpr>(S)->getNoWrapFlags());
9959returngetMulExpr(NewOps, cast<SCEVMulExpr>(S)->getNoWrapFlags());
9979constSCEV *ScalarEvolution::computeSCEVAtScope(
constSCEV *V,
constLoop *L) {
9980switch (
V->getSCEVType()) {
9985// If this is a loop recurrence for a loop that does not contain L, then we 9986// are dealing with the final value computed by the loop. 9988// First, attempt to evaluate each operand. 9989// Avoid performing the look-up in the common case where the specified 9990// expression has no loop-variant portions. 9996// Okay, at least one of these operands is loop variant but might be 9997// foldable. Build a new instance of the folded commutative expression. 10002for (++i; i !=
e; ++i)
10007 AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
10008// The addrec may be folded to a nonrecurrence, for example, if the 10009// induction variable is multiplied by zero after constant folding. Go 10010// ahead and return the folded value. 10016// If the scope is outside the addrec's loop, evaluate it by using the 10017// loop exit value of the addrec. 10019// To evaluate this recurrence, we need to know how many times the AddRec 10020// loop iterates. Compute this now. 10025// Then, evaluate the AddRec. 10044// Avoid performing the look-up in the common case where the specified 10045// expression has no loop-variant portions. 10046for (
unsigned i = 0, e = Ops.
size(); i != e; ++i) {
10048if (OpAtScope != Ops[i]) {
10049// Okay, at least one of these operands is loop variant but might be 10050// foldable. Build a new instance of the folded commutative expression. 10056for (++i; i !=
e; ++i) {
10061return getWithOperands(V, NewOps);
10064// If we got here, all operands are loop invariant. 10068// If this instruction is evolved from a constant-evolving PHI, compute the 10069// exit value from the loop without using SCEVs. 10073returnV;
// This is some other type of SCEVUnknown, just return it. 10075if (
PHINode *PN = dyn_cast<PHINode>(
I)) {
10076constLoop *CurrLoop = this->LI[
I->getParent()];
10077// Looking for loop exit value. 10080// Okay, there is no closed form solution for the PHI node. Check 10081// to see if the loop that contains it has a known backedge-taken 10082// count. If so, we may be able to force computation of the exit 10085// This trivial case can show up in some degenerate cases where 10086// the incoming IR has not yet been fully simplified. 10087if (BackedgeTakenCount->
isZero()) {
10088Value *InitValue =
nullptr;
10089bool MultipleInitValues =
false;
10095 MultipleInitValues =
true;
10100if (!MultipleInitValues && InitValue)
10103// Do we have a loop invariant value flowing around the backedge 10104// for a loop which must execute the backedge? 10105if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
10109unsigned InLoopPred =
10115if (
auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
10116// Okay, we know how many times the containing loop executes. If 10117// this is a constant evolving PHI node, get the final value at 10118// the specified iteration number. 10120 getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), CurrLoop);
10127// Okay, this is an expression that we cannot symbolically evaluate 10128// into a SCEV. Check to see if it's possible to symbolically evaluate 10129// the arguments into constants, and if so, try to constant propagate the 10130// result. This is particularly useful for computing loop exit values. 10132returnV;
// This is some other type of SCEVUnknown, just return it. 10136bool MadeImprovement =
false;
10143// If any of the operands is non-constant and if they are 10144// non-integer and non-pointer, don't even try to analyze them 10145// with scev techniques. 10151 MadeImprovement |= OrigV != OpV;
10156assert(
C->getType() ==
Op->getType() &&
"Type mismatch");
10160// Check to see if getSCEVAtScope actually made an improvement. 10161if (!MadeImprovement)
10162returnV;
// This is some other type of SCEVUnknown, just return it. 10167/*AllowNonDeterministic=*/false);
10182constSCEV *ScalarEvolution::stripInjectiveFunctions(
constSCEV *S)
const{
10184return stripInjectiveFunctions(ZExt->getOperand());
10186return stripInjectiveFunctions(SExt->getOperand());
10190/// Finds the minimum unsigned root of the following equation: 10192/// A * X = B (mod N) 10194/// where N = 2^BW and BW is the common bit width of A and B. The signedness of 10195/// A and B isn't important. 10197/// If the equation does not have a solution, SCEVCouldNotCompute is returned. 10205assert(
A != 0 &&
"A must be non-zero.");
10209// The gcd of A and N may have only one prime factor: 2. The number of 10210// trailing zeros in A is its multiplicity 10214// 2. Check if B is divisible by D. 10216// B is divisible by D if and only if the multiplicity of prime factor 2 for B 10217// is not less than multiplicity of this prime factor for D. 10219// Check if we can prove there's no remainder using URem. 10224// Try to add a predicate ensuring B is a multiple of 1 << Mult2. 10228// Avoid adding a predicate that is known to be false. 10235// 3. Compute I: the multiplicative inverse of (A / D) in arithmetic 10238// If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent 10239// (N / D) in general. The inverse itself always fits into BW bits, though, 10240// so we immediately truncate it. 10241APInt AD =
A.lshr(Mult2).trunc(BW - Mult2);
// AD = A / D 10244// 4. Compute the minimum unsigned root of the equation: 10245// I * (B / D) mod (N / D) 10246// To simplify the computation, we factor out the divide by D: 10247// (I * B mod N) / D 10252/// For a given quadratic addrec, generate coefficients of the corresponding 10253/// quadratic equation, multiplied by a common value to ensure that they are 10255/// The returned value is a tuple { A, B, C, M, BitWidth }, where 10256/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C 10257/// were multiplied by, and BitWidth is the bit width of the original addrec 10259/// This function returns std::nullopt if the addrec coefficients are not 10260/// compile- time constants. 10261static std::optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
10268 << *AddRec <<
'\n');
10270// We currently can only solve this if the coefficients are constants. 10271if (!LC || !MC || !
NC) {
10272LLVM_DEBUG(
dbgs() << __func__ <<
": coefficients are not constant\n");
10273return std::nullopt;
10279assert(!
N.isZero() &&
"This is not a quadratic addrec");
10285// The sign-extension (as opposed to a zero-extension) here matches the 10286// extension used in SolveQuadraticEquationWrap (with the same motivation). 10287N =
N.sext(NewWidth);
10288 M = M.sext(NewWidth);
10289 L = L.sext(NewWidth);
10291// The increments are M, M+N, M+2N, ..., so the accumulated values are 10292// L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is, 10293// L+M, L+2M+N, L+3M+3N, ... 10294// After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N. 10296// The equation Acc = 0 is then 10297// L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0. 10298// In a quadratic form it becomes: 10299// N n^2 + (2M-N) n + 2L = 0. 10306 <<
"x + " <<
C <<
", coeff bw: " << NewWidth
10307 <<
", multiplied by " <<
T <<
'\n');
10311/// Helper function to compare optional APInts: 10312/// (a) if X and Y both exist, return min(X, Y), 10313/// (b) if neither X nor Y exist, return std::nullopt, 10314/// (c) if exactly one of X and Y exists, return that value. 10316 std::optional<APInt>
Y) {
10318unsigned W = std::max(
X->getBitWidth(),
Y->getBitWidth());
10321return XW.
slt(YW) ? *
X : *
Y;
10324return std::nullopt;
10328/// Helper function to truncate an optional APInt to a given BitWidth. 10329/// When solving addrec-related equations, it is preferable to return a value 10330/// that has the same bit width as the original addrec's coefficients. If the 10331/// solution fits in the original bit width, truncate it (except for i1). 10332/// Returning a value of a different bit width may inhibit some optimizations. 10334/// In general, a solution to a quadratic equation generated from an addrec 10335/// may require BW+1 bits, where BW is the bit width of the addrec's 10336/// coefficients. The reason is that the coefficients of the quadratic 10337/// equation are BW+1 bits wide (to avoid truncation when converting from 10338/// the addrec to the equation). 10342return std::nullopt;
10343unsigned W =
X->getBitWidth();
10349/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n 10350/// iterations. The values L, M, N are assumed to be signed, and they 10351/// should all have the same bit widths. 10352/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW, 10353/// where BW is the bit width of the addrec's coefficients. 10354/// If the calculated value is a BW-bit integer (for BW > 1), it will be 10355/// returned as such, otherwise the bit width of the returned value may 10356/// be greater than BW. 10358/// This function returns std::nullopt if 10359/// (a) the addrec coefficients are not constant, or 10360/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases 10361/// like x^2 = 5, no integer solutions exist, in other cases an integer 10362/// solution may exist, but SolveQuadraticEquationWrap may fail to find it. 10363static std::optional<APInt>
10369return std::nullopt;
10372LLVM_DEBUG(
dbgs() << __func__ <<
": solving for unsigned overflow\n");
10373 std::optional<APInt>
X =
10376return std::nullopt;
10381return std::nullopt;
10386/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n 10387/// iterations. The values M, N are assumed to be signed, and they 10388/// should all have the same bit widths. 10389/// Find the least n such that c(n) does not belong to the given range, 10390/// while c(n-1) does. 10392/// This function returns std::nullopt if 10393/// (a) the addrec coefficients are not constant, or 10394/// (b) SolveQuadraticEquationWrap was unable to find a solution for the 10395/// bounds of the range. 10396static std::optional<APInt>
10400"Starting value of addrec should be 0");
10401LLVM_DEBUG(
dbgs() << __func__ <<
": solving boundary crossing for range " 10402 <<
Range <<
", addrec " << *AddRec <<
'\n');
10403// This case is handled in getNumIterationsInRange. Here we can assume that 10404// we start in the range. 10406"Addrec's initial value should be in range");
10412return std::nullopt;
10414// Be careful about the return value: there can be two reasons for not 10415// returning an actual number. First, if no solutions to the equations 10416// were found, and second, if the solutions don't leave the given range. 10417// The first case means that the actual solution is "unknown", the second 10418// means that it's known, but not valid. If the solution is unknown, we 10419// cannot make any conclusions. 10420// Return a pair: the optional solution and a flag indicating if the 10421// solution was found. 10422auto SolveForBoundary =
10423 [&](
APInt Bound) -> std::pair<std::optional<APInt>,
bool> {
10424// Solve for signed overflow and unsigned overflow, pick the lower 10426LLVM_DEBUG(
dbgs() <<
"SolveQuadraticAddRecRange: checking boundary " 10427 << Bound <<
" (before multiplying by " << M <<
")\n");
10428 Bound *= M;
// The quadratic equation multiplier. 10430 std::optional<APInt> SO;
10433"signed overflow\n");
10437"unsigned overflow\n");
10438 std::optional<APInt> UO =
10441auto LeavesRange = [&] (
constAPInt &
X) {
10446// X should be at least 1, so X-1 is non-negative. 10454// If SolveQuadraticEquationWrap returns std::nullopt, it means that there 10455// can be a solution, but the function failed to find it. We cannot treat it 10456// as "no solution". 10458return {std::nullopt,
false};
10460// Check the smaller value first to see if it leaves the range. 10461// At this point, both SO and UO must have values. 10463if (LeavesRange(*Min))
10464return { Min,
true };
10465 std::optional<APInt> Max = Min == SO ? UO : SO;
10466if (LeavesRange(*Max))
10467return { Max,
true };
10469// Solutions were found, but were eliminated, hence the "true". 10470return {std::nullopt,
true};
10474// Lower bound is inclusive, subtract 1 to represent the exiting value. 10477auto SL = SolveForBoundary(
Lower);
10478auto SU = SolveForBoundary(
Upper);
10479// If any of the solutions was unknown, no meaninigful conclusions can 10481if (!SL.second || !SU.second)
10482return std::nullopt;
10484// Claim: The correct solution is not some value between Min and Max. 10486// Justification: Assuming that Min and Max are different values, one of 10487// them is when the first signed overflow happens, the other is when the 10488// first unsigned overflow happens. Crossing the range boundary is only 10489// possible via an overflow (treating 0 as a special case of it, modeling 10490// an overflow as crossing k*2^W for some k). 10492// The interesting case here is when Min was eliminated as an invalid 10493// solution, but Max was not. The argument is that if there was another 10494// overflow between Min and Max, it would also have been eliminated if 10495// it was considered. 10497// For a given boundary, it is possible to have two overflows of the same 10498// type (signed/unsigned) without having the other type in between: this 10499// can happen when the vertex of the parabola is between the iterations 10500// corresponding to the overflows. This is only possible when the two 10501// overflows cross k*2^W for the same k. In such case, if the second one 10502// left the range (and was the first one to do so), the first overflow 10503// would have to enter the range, which would mean that either we had left 10504// the range before or that we started outside of it. Both of these cases 10505// are contradictions. 10507// Claim: In the case where SolveForBoundary returns std::nullopt, the correct 10508// solution is not some value between the Max for this boundary and the 10509// Min of the other boundary. 10511// Justification: Assume that we had such Max_A and Min_B corresponding 10512// to range boundaries A and B and such that Max_A < Min_B. If there was 10513// a solution between Max_A and Min_B, it would have to be caused by an 10514// overflow corresponding to either A or B. It cannot correspond to B, 10515// since Min_B is the first occurrence of such an overflow. If it 10516// corresponded to A, it would have to be either a signed or an unsigned 10517// overflow that is larger than both eliminated overflows for A. But 10518// between the eliminated overflows and this overflow, the values would 10519// cover the entire value space, thus crossing the other boundary, which 10520// is a contradiction. 10527bool ControlsOnlyExit,
10528bool AllowPredicates) {
10530// This is only used for loops with a "x != y" exit test. The exit condition 10531// is now expressed as a single expression, V = x-y. So the exit test is 10532// effectively V != 0. We know and take advantage of the fact that this 10533// expression only being used in a comparison by zero context. 10536// If the value is a constant 10538// If the value is already zero, the branch will execute zero times. 10539if (
C->getValue()->isZero())
returnC;
10544 dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V));
10546if (!AddRec && AllowPredicates)
10547// Try to make this an AddRec using runtime tests, in the first X 10548// iterations of this loop, where X is the SCEV expression found by the 10552if (!AddRec || AddRec->
getLoop() != L)
10555// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of 10556// the quadratic equation to solve it. 10558// We can only use this value if the chrec ends up with an exact zero 10559// value at this index. When solving for "X*X != 5", for example, we 10560// should not accept a root of 2. 10563return ExitLimit(R, R, R,
false, Predicates);
10568// Otherwise we can only handle this if it is affine. 10572// If this is an affine expression, the execution count of this branch is 10573// the minimum unsigned root of the following equation: 10575// Start + Step*N = 0 (mod 2^BW) 10579// Step*N = -Start (mod 2^BW) 10581// where BW is the common bit width of Start and Step. 10583// Get the initial value for the loop. 10591// Specialize step for this loop so we get context sensitive facts below. 10594// For positive steps (counting up until unsigned overflow): 10595// N = -Start/Step (as unsigned) 10596// For negative steps (counting down to zero): 10598// First compute the unsigned distance from zero in the direction of Step. 10604// Handle unitary steps, which cannot wraparound. 10605// 1*N = -Start; -1*N = Start (mod 2^BW), so: 10606// N = Distance (as unsigned) 10612// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, 10613// we end up with a loop whose backedge-taken count is n - 1. Detect this 10614// case, and see if we can improve the bound. 10616// Explicitly handling this here is necessary because getUnsignedRange 10617// isn't context-sensitive; it doesn't know that we only care about the 10618// range inside the loop. 10623// If Distance + 1 doesn't overflow, we can compute the maximum distance 10624// as "unsigned_max(Distance + 1) - 1". 10628return ExitLimit(Distance,
getConstant(MaxBECount), Distance,
false,
10632// If the condition controls loop exit (the loop exits only if the expression 10633// is true) and the addition is no-wrap we can use unsigned divide to 10634// compute the backedge count. In this case, the step may not divide the 10635// distance, but we don't care because if the condition is "missed" the loop 10636// will have undefined behavior due to wrapping. 10640// If the stride is zero, the loop must be infinite. In C++, most loops 10641// are finite by assumption, in which case the step being zero implies 10642// UB must execute if the loop is entered. 10654constSCEV *SymbolicMax =
10655 isa<SCEVCouldNotCompute>(
Exact) ? ConstantMax :
Exact;
10656return ExitLimit(
Exact, ConstantMax, SymbolicMax,
false, Predicates);
10659// Solve the general equation. 10660constSCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
10665 AllowPredicates ? &Predicates :
nullptr, *
this);
10672auto *S = isa<SCEVCouldNotCompute>(E) ?
M : E;
10673return ExitLimit(E, M, S,
false, Predicates);
10677ScalarEvolution::howFarToNonZero(
constSCEV *V,
constLoop *L) {
10678// Loops that look like: while (X == 0) are very strange indeed. We don't 10679// handle them yet except for the trivial case. This could be expanded in the 10680// future as needed. 10682// If the value is a constant, check to see if it is known to be non-zero 10683// already. If so, the backedge will execute zero times. 10685if (!
C->getValue()->isZero())
10690// We could implement others, but I really doubt anyone writes loops like 10691// this, and if they did, they would already be constant folded. 10695std::pair<const BasicBlock *, const BasicBlock *>
10696ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(
constBasicBlock *BB)
10698// If the block has a unique predecessor, then there is no path from the 10699// predecessor to the block that does not go through the direct edge 10700// from the predecessor to the block. 10704// A loop's header is defined to be a block that dominates the loop. 10705// If the header has a unique predecessor outside the loop, it must be 10706// a block that has exactly one successor that can reach the loop. 10708return {
L->getLoopPredecessor(),
L->getHeader()};
10710return {
nullptr, BB};
10713/// SCEV structural equivalence is usually sufficient for testing whether two 10714/// expressions are equal, however for the purposes of looking for a condition 10715/// guarding a loop, it can be useful to be a little more general, since a 10716/// front-end may have replicated the controlling expression. 10718// Quick check to see if they are the same SCEV. 10719if (
A ==
B)
returntrue;
10722// Not all instructions that are "identical" compute the same value. For 10723// instance, two distinct alloca instructions allocating the same type are 10724// identical and do not read memory; but compute distinct values. 10725returnA->isIdenticalTo(
B) && (isa<BinaryOperator>(
A) || isa<GetElementPtrInst>(
A));
10728// Otherwise, if they're both SCEVUnknown, it's possible that they hold 10729// two different instructions with the same value. Check for this case. 10732if (
constInstruction *AI = dyn_cast<Instruction>(AU->getValue()))
10733if (
constInstruction *BI = dyn_cast<Instruction>(BU->getValue()))
10734if (ComputesEqualValues(AI, BI))
10737// Otherwise assume they may have a different value. 10743if (!
Add ||
Add->getNumOperands() != 2)
10745if (
auto *ME = dyn_cast<SCEVMulExpr>(
Add->getOperand(0));
10746 ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
10748RHS = ME->getOperand(1);
10751if (
auto *ME = dyn_cast<SCEVMulExpr>(
Add->getOperand(1));
10752 ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
10754RHS = ME->getOperand(1);
10762bool Changed =
false;
10763// Simplifies ICMP to trivial true or false by turning it into '0 == 0' or 10765auto TrivialCase = [&](
bool TriviallyTrue) {
10770// If we hit the max recursion limit bail out. 10774// Canonicalize a constant to the right side. 10776// Check for both operands constant. 10779return TrivialCase(
false);
10780return TrivialCase(
true);
10782// Otherwise swap the operands to put the constant on the right. 10788// If we're comparing an addrec with a value which is loop-invariant in the 10789// addrec's loop, put the addrec on the left. Also make a dominance check, 10790// as both operands could be addrecs loop-invariant in each other's loop. 10800// If there's a constant operand, canonicalize comparisons with boundary 10801// cases, and canonicalize *-or-equal comparisons to regular comparisons. 10803constAPInt &
RA = RC->getAPInt();
10805bool SimplifiedByConstantRange =
false;
10810return TrivialCase(
true);
10812return TrivialCase(
false);
10818// We were able to convert an inequality to an equality. 10821 Changed = SimplifiedByConstantRange =
true;
10825if (!SimplifiedByConstantRange) {
10831// Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. 10836// The "Should have been caught earlier!" messages refer to the fact 10837// that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above 10838// should have fired on the corresponding cases, and canonicalized the 10839// check to trivial case. 10842assert(!
RA.isMinValue() &&
"Should have been caught earlier!");
10848assert(!
RA.isMaxValue() &&
"Should have been caught earlier!");
10854assert(!
RA.isMinSignedValue() &&
"Should have been caught earlier!");
10860assert(!
RA.isMaxSignedValue() &&
"Should have been caught earlier!");
10869// Check for obvious equality. 10872return TrivialCase(
true);
10874return TrivialCase(
false);
10877// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by 10878// adding or subtracting 1 from one of the operands. 10934// TODO: More simplifications are possible here. 10936// Recursively simplify until we either hit a recursion limit or nothing 10961// Query push down for cases where the unsigned range is 10962// less than sufficient. 10963if (
constauto *SExt = dyn_cast<SCEVSignExtendExpr>(S))
10970auto NonRecursive = [
this, OrNegative](
constSCEV *S) {
10971if (
auto *
C = dyn_cast<SCEVConstant>(S))
10972returnC->getAPInt().isPowerOf2() ||
10973 (OrNegative &&
C->getAPInt().isNegatedPowerOf2());
10975// The vscale_range indicates vscale is a power-of-two. 10976return isa<SCEVVScale>(S) && F.
hasFnAttribute(Attribute::VScaleRange);
10979if (NonRecursive(S))
10982auto *
Mul = dyn_cast<SCEVMulExpr>(S);
10988std::pair<const SCEV *, const SCEV *>
10990// Compute SCEV on entry of loop L. 10991constSCEV *Start = SCEVInitRewriter::rewrite(S, L, *
this);
10993return { Start, Start };
10994// Compute post increment SCEV for loop L. 10995constSCEV *
PostInc = SCEVPostIncRewriter::rewrite(S, L, *
this);
11002// First collect all loops. 11004 getUsedLoops(
LHS, LoopsUsed);
11005 getUsedLoops(
RHS, LoopsUsed);
11007if (LoopsUsed.
empty())
11010// Domination relationship must be a linear order on collected loops. 11012for (
constauto *L1 : LoopsUsed)
11013for (
constauto *L2 : LoopsUsed)
11015 DT.
dominates(L2->getHeader(), L1->getHeader())) &&
11016"Domination relationship is not a linear order");
11024// Get init and post increment value for LHS. 11026// if LHS contains unknown non-invariant SCEV then bail out. 11030// Get init and post increment value for RHS. 11032// if RHS contains unknown non-invariant SCEV then bail out. 11036// It is possible that init SCEV contains an invariant load but it does 11037// not dominate MDL and is not available at MDL loop entry, so we should 11043// It seems backedge guard check is faster than entry one so in some cases 11044// it can speed up whole estimation by short circuit 11046 SplitRHS.second) &&
11052// Canonicalize the inputs first. 11058if (isKnownPredicateViaSplitting(Pred,
LHS,
RHS))
11061// Otherwise see what can be done with some simple reasoning. 11062return isKnownViaNonRecursiveReasoning(Pred,
LHS,
RHS);
11072return std::nullopt;
11078// TODO: Analyze guards and assumes from Context's block. 11087if (KnownWithoutContext)
11088return KnownWithoutContext;
11095return std::nullopt;
11106std::optional<ScalarEvolution::MonotonicPredicateType>
11109auto Result = getMonotonicPredicateTypeImpl(
LHS, Pred);
11112// Verify an invariant: inverting the predicate should turn a monotonically 11113// increasing change to a monotonically decreasing one, and vice versa. 11115auto ResultSwapped =
11118assert(*ResultSwapped != *Result &&
11119"monotonicity should flip as we flip the predicate");
11126std::optional<ScalarEvolution::MonotonicPredicateType>
11127ScalarEvolution::getMonotonicPredicateTypeImpl(
constSCEVAddRecExpr *LHS,
11129// A zero step value for LHS means the induction variable is essentially a 11130// loop invariant value. We don't really depend on the predicate actually 11131// flipping from false to true (for increasing predicates, and the other way 11132// around for decreasing predicates), all we care about is that *if* the 11133// predicate changes then it only changes from false to true. 11135// A zero step value in itself is not very useful, but there may be places 11136// where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be 11137// as general as possible. 11139// Only handle LE/LT/GE/GT predicates. 11141return std::nullopt;
11145"Should be greater or less!");
11147// Check that AR does not wrap. 11149if (!
LHS->hasNoUnsignedWrap())
11150return std::nullopt;
11154"Relational predicate is either signed or unsigned!");
11155if (!
LHS->hasNoSignedWrap())
11156return std::nullopt;
11158constSCEV *Step =
LHS->getStepRecurrence(*
this);
11166return std::nullopt;
11169std::optional<ScalarEvolution::LoopInvariantPredicate>
11174// If there is a loop-invariant, force it into the RHS, otherwise bail out. 11177return std::nullopt;
11184if (!ArLHS || ArLHS->
getLoop() != L)
11185return std::nullopt;
11189return std::nullopt;
11190// If the predicate "ArLHS `Pred` RHS" monotonically increases from false to 11191// true as the loop iterates, and the backedge is control dependent on 11192// "ArLHS `Pred` RHS" == true then we can reason as follows: 11194// * if the predicate was false in the first iteration then the predicate 11195// is never evaluated again, since the loop exits without taking the 11197// * if the predicate was true in the first iteration then it will 11198// continue to be true for all future iterations since it is 11199// monotonically increasing. 11201// For both the above possibilities, we can replace the loop varying 11202// predicate with its value on the first iteration of the loop (which is 11205// A similar reasoning applies for a monotonically decreasing predicate, by 11206// replacing true with false and false with true in the above two bullets. 11215return std::nullopt;
11216// Try to prove via context. 11217// TODO: Support other cases. 11224// Given preconditions 11225// (1) ArLHS does not cross the border of positive and negative parts of 11226// range because of: 11227// - Positive step; (TODO: lift this limitation) 11228// - nuw - does not cross zero boundary; 11229// - nsw - does not cross SINT_MAX boundary; 11232// we can replace the loop variant ArLHS <u RHS condition with loop 11233// invariant Start(ArLHS) <u RHS. 11235// Because of (1) there are two options: 11236// - ArLHS is always negative. It means that ArLHS <u RHS is always false; 11237// - ArLHS is always non-negative. Because of (3) RHS is also non-negative. 11238// It means that ArLHS <s RHS <=> ArLHS <u RHS. 11239// Because of (2) ArLHS <u RHS is trivially true. 11240// All together it means that ArLHS <u RHS <=> Start(ArLHS) >=s 0. 11241// We can strengthen this to Start(ArLHS) <u RHS. 11252return std::nullopt;
11255std::optional<ScalarEvolution::LoopInvariantPredicate>
11260 Pred,
LHS,
RHS, L, CtxI, MaxIter))
11262if (
auto *
UMin = dyn_cast<SCEVUMinExpr>(MaxIter))
11263// Number of iterations expressed as UMIN isn't always great for expressing 11264// the value on the last iteration. If the straightforward approach didn't 11265// work, try the following trick: if the a predicate is invariant for X, it 11266// is also invariant for umin(X, ...). So try to find something that works 11267// among subexpressions of MaxIter expressed as umin. 11268for (
auto *
Op :
UMin->operands())
11272return std::nullopt;
11275std::optional<ScalarEvolution::LoopInvariantPredicate>
11279// Try to prove the following set of facts: 11280// - The predicate is monotonic in the iteration space. 11281// - If the check does not fail on the 1st iteration: 11282// - No overflow will happen during first MaxIter iterations; 11283// - It will not fail on the MaxIter'th iteration. 11284// If the check does fail on the 1st iteration, we leave the loop and no 11285// other checks matter. 11287// If there is a loop-invariant, force it into the RHS, otherwise bail out. 11290return std::nullopt;
11296auto *AR = dyn_cast<SCEVAddRecExpr>(
LHS);
11297if (!AR || AR->
getLoop() != L)
11298return std::nullopt;
11300// The predicate must be relational (i.e. <, <=, >=, >). 11302return std::nullopt;
11304// TODO: Support steps other than +/- 1. 11308if (Step != One && Step != MinusOne)
11309return std::nullopt;
11311// Type mismatch here means that MaxIter is potentially larger than max 11312// unsigned value in start type, which mean we cannot prove no wrap for the 11315return std::nullopt;
11317// Value of IV on suggested last iteration. 11319// Does it still meet the requirement? 11321return std::nullopt;
11322// Because step is +/- 1 and MaxIter has same type as Start (i.e. it does 11323// not exceed max unsigned value of this type), this effectively proves 11324// that there is no wrap during the iteration. To prove that there is no 11325// signed/unsigned wrap, we need to check that 11326// Start <= Last for step = 1 or Start >= Last for step = -1. 11329if (Step == MinusOne)
11333return std::nullopt;
11335// Everything is fine. 11339bool ScalarEvolution::isKnownPredicateViaConstantRanges(
CmpPredicate Pred,
11345// This code is split out from isKnownPredicate because it is called from 11346// within isLoopEntryGuardedByCond. 11350return RangeLHS.
icmp(Pred, RangeRHS);
11353// The check at the top of the function catches the case where the values are 11354// known to be equal. 11361if (CheckRanges(SL, SR))
11365if (CheckRanges(UL, UR))
11374return CheckRanges(SL, SR);
11379return CheckRanges(UL, UR);
11382bool ScalarEvolution::isKnownPredicateViaNoOverflow(
CmpPredicate Pred,
11385// Match X to (A + C1)<ExpectedFlags> and Y to (A + C2)<ExpectedFlags>, where 11386// C1 and C2 are constant integers. If either X or Y are not add expressions, 11387// consider them as X + 0 and Y + 0 respectively. C1 and C2 are returned via 11389auto MatchBinaryAddToConst = [
this](
constSCEV *
X,
constSCEV *
Y,
11392constSCEV *XNonConstOp, *XConstOp;
11393constSCEV *YNonConstOp, *YConstOp;
11397if (!splitBinaryAdd(
X, XConstOp, XNonConstOp, XFlagsPresent)) {
11400 XFlagsPresent = ExpectedFlags;
11402if (!isa<SCEVConstant>(XConstOp) ||
11403 (XFlagsPresent & ExpectedFlags) != ExpectedFlags)
11406if (!splitBinaryAdd(
Y, YConstOp, YNonConstOp, YFlagsPresent)) {
11409 YFlagsPresent = ExpectedFlags;
11412if (!isa<SCEVConstant>(YConstOp) ||
11413 (YFlagsPresent & ExpectedFlags) != ExpectedFlags)
11416if (YNonConstOp != XNonConstOp)
11419 OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt();
11420 OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt();
11436// (X + C1)<nsw> s<= (X + C2)<nsw> if C1 s<= C2. 11446// (X + C1)<nsw> s< (X + C2)<nsw> if C1 s< C2. 11456// (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2. 11466// (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2. 11475bool ScalarEvolution::isKnownPredicateViaSplitting(
CmpPredicate Pred,
11481// Allowing arbitrary number of activations of isKnownPredicateViaSplitting on 11482// the stack can result in exponential time complexity. 11485// If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L 11487// To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use 11488// isKnownPredicate. isKnownPredicate is more powerful, but also more 11489// expensive; and using isKnownNonNegative(RHS) is sufficient for most of the 11490// interesting cases seen in practice. We can consider "upgrading" L >= 0 to 11491// use isKnownPredicate later if needed. 11499// No need to even try if we know the module has no guards. 11507returnmatch(&
I, m_Intrinsic<Intrinsic::experimental_guard>(
11509 isImpliedCond(Pred, LHS, RHS, Condition,
false);
11513/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is 11514/// protected by a conditional between LHS and RHS. This is used to 11515/// to eliminate casts. 11520// Interpret a null as meaning no loop, where there is obviously no guard 11521// (interprocedural conditions notwithstanding). Do not bother about 11522// unreachable loops. 11528"This cannot be done on broken IR!");
11531if (isKnownViaNonRecursiveReasoning(Pred,
LHS,
RHS))
11540if (LoopContinuePredicate && LoopContinuePredicate->
isConditional() &&
11541 isImpliedCond(Pred,
LHS,
RHS,
11543 LoopContinuePredicate->
getSuccessor(0) != L->getHeader()))
11546// We don't want more than one activation of the following loops on the stack 11547// -- that can lead to O(n!) time complexity. 11548if (WalkingBEDominatingConds)
11553// See if we can exploit a trip count to prove the predicate. 11554constauto &BETakenInfo = getBackedgeTakenInfo(L);
11555constSCEV *LatchBECount = BETakenInfo.getExact(Latch,
this);
11557// We know that Latch branches back to the loop header exactly 11558// LatchBECount times. This means the backdege condition at Latch is 11559// equivalent to "{0,+,1} u< LatchBECount". 11562constSCEV *LoopCounter =
11569// Check conditions due to any @llvm.assume intrinsics. 11573auto *CI = cast<CallInst>(AssumeVH);
11577if (isImpliedCond(Pred,
LHS,
RHS, CI->getArgOperand(0),
false))
11581if (isImpliedViaGuard(Latch, Pred,
LHS,
RHS))
11584for (
DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
11585 DTN != HeaderDTN; DTN = DTN->getIDom()) {
11586assert(DTN &&
"should reach the loop header before reaching the root!");
11589if (isImpliedViaGuard(BB, Pred,
LHS,
RHS))
11597if (!ContinuePredicate || !ContinuePredicate->
isConditional())
11602// If we have an edge `E` within the loop body that dominates the only 11603// latch, the condition guarding `E` also guards the backedge. This 11604// reasoning works only for loops with a single latch. 11608// We're constructively (and conservatively) enumerating edges within the 11609// loop body that dominate the latch. The dominator tree better agree 11613if (isImpliedCond(Pred,
LHS,
RHS, Condition,
11626// Do not bother proving facts for unreachable code. 11631"This cannot be done on broken IR!");
11633// If we cannot prove strict comparison (e.g. a > b), maybe we can prove 11634// the facts (a >= b && a != b) separately. A typical situation is when the 11635// non-strict comparison is known from ranges and non-equality is known from 11636// dominating predicates. If we are proving strict comparison, we always try 11637// to prove non-equality and non-strict comparison separately. 11639constbool ProvingStrictComparison = (Pred != NonStrictPredicate);
11640bool ProvedNonStrictComparison =
false;
11641bool ProvedNonEquality =
false;
11644if (!ProvedNonStrictComparison)
11645 ProvedNonStrictComparison = Fn(NonStrictPredicate);
11646if (!ProvedNonEquality)
11648if (ProvedNonStrictComparison && ProvedNonEquality)
11653if (ProvingStrictComparison) {
11655return isKnownViaNonRecursiveReasoning(
P,
LHS,
RHS);
11657if (SplitAndProve(ProofFn))
11661// Try to prove (Pred, LHS, RHS) using isImpliedCond. 11662auto ProveViaCond = [&](
constValue *Condition,
boolInverse) {
11666if (ProvingStrictComparison) {
11670if (SplitAndProve(ProofFn))
11676// Starting at the block's predecessor, climb up the predecessor chain, as long 11677// as there are predecessors that can be found that have unique successors 11678// leading to the original block. 11681if (ContainingLoop && ContainingLoop->
getHeader() == BB)
11685for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB);
11686 Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
11688 dyn_cast<BranchInst>(Pair.first->getTerminator());
11697// Check conditions due to any @llvm.assume intrinsics. 11701auto *CI = cast<CallInst>(AssumeVH);
11705if (ProveViaCond(CI->getArgOperand(0),
false))
11709// Check conditions due to any @llvm.experimental.guard intrinsics. 11711 F.
getParent(), Intrinsic::experimental_guard);
11713for (
constauto *GU : GuardDecl->users())
11714if (
constauto *Guard = dyn_cast<IntrinsicInst>(GU))
11716if (ProveViaCond(Guard->getArgOperand(0),
false))
11724// Interpret a null as meaning no loop, where there is obviously no guard 11725// (interprocedural conditions notwithstanding). 11729// Both LHS and RHS must be available at loop entry. 11731"LHS is not available at Loop Entry");
11733"RHS is not available at Loop Entry");
11735if (isKnownViaNonRecursiveReasoning(Pred,
LHS,
RHS))
11745// False conditions implies anything. Do not bother analyzing it further. 11746if (FoundCondValue ==
11750if (!PendingLoopPredicates.insert(FoundCondValue).second)
11754make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
11756// Recursively handle And and Or conditions. 11757constValue *Op0, *Op1;
11760return isImpliedCond(Pred, LHS, RHS, Op0,
Inverse, CtxI) ||
11761 isImpliedCond(Pred, LHS, RHS, Op1,
Inverse, CtxI);
11764return isImpliedCond(Pred, LHS, RHS, Op0,
Inverse, CtxI) ||
11765 isImpliedCond(Pred, LHS, RHS, Op1,
Inverse, CtxI);
11768constICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
11769if (!ICI)
returnfalse;
11771// Now that we found a conditional branch that dominates the loop or controls 11772// the loop latch. Check to see if it is the comparison we are looking for. 11782return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, CtxI);
11787constSCEV *FoundLHS,
constSCEV *FoundRHS,
11789// Balance the types. 11792// For unsigned and equality predicates, try to prove that both found 11793// operands fit into narrow unsigned range. If so, try to prove facts in 11798auto *WideType = FoundLHS->
getType();
11808if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS,
11809 TruncFoundRHS, CtxI))
11835return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS,
11839bool ScalarEvolution::isImpliedCondBalancedTypes(
11844"Types should be balanced!");
11845// Canonicalize the query to match the way instcombine will have 11846// canonicalized the comparison. 11851if (FoundLHS == FoundRHS)
11854// Check to see if we can make the LHS or RHS match. 11855if (LHS == FoundRHS || RHS == FoundLHS) {
11856if (isa<SCEVConstant>(RHS)) {
11865// Check whether the found predicate is the same as the desired predicate. 11866// FIXME: use CmpPredicate::getMatching here. 11868return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
11870// Check whether swapping the found predicate makes it the same as the 11871// desired predicate. 11872// FIXME: use CmpPredicate::getMatching here. 11875// We can write the implication 11876// 0. LHS Pred RHS <- FoundLHS SwapPred FoundRHS 11877// using one of the following ways: 11878// 1. LHS Pred RHS <- FoundRHS Pred FoundLHS 11879// 2. RHS SwapPred LHS <- FoundLHS SwapPred FoundRHS 11880// 3. LHS Pred RHS <- ~FoundLHS Pred ~FoundRHS 11881// 4. ~LHS SwapPred ~RHS <- FoundLHS SwapPred FoundRHS 11882// Forms 1. and 2. require swapping the operands of one condition. Don't 11883// do this if it would break canonical constant/addrec ordering. 11884if (!isa<SCEVConstant>(RHS) && !isa<SCEVAddRecExpr>(LHS))
11885return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS,
11887if (!isa<SCEVConstant>(FoundRHS) && !isa<SCEVAddRecExpr>(FoundLHS))
11888return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, CtxI);
11890// There's no clear preference between forms 3. and 4., try both. Avoid 11891// forming getNotSCEV of pointer values as the resulting subtract is 11895 FoundLHS, FoundRHS, CtxI))
11900 isImpliedCondOperands(Pred, LHS, RHS,
getNotSCEV(FoundLHS),
11909assert(P1 != P2 &&
"Handled earlier!");
11913if (IsSignFlippedPredicate(Pred, FoundPred)) {
11914// Unsigned comparison is the same as signed comparison when both the 11915// operands are non-negative or negative. 11918return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
11919// Create local copies that we can freely swap and canonicalize our 11920// conditions to "le/lt". 11921CmpPredicate CanonicalPred = Pred, CanonicalFoundPred = FoundPred;
11922constSCEV *CanonicalLHS =
LHS, *CanonicalRHS =
RHS,
11923 *CanonicalFoundLHS = FoundLHS, *CanonicalFoundRHS = FoundRHS;
11928std::swap(CanonicalFoundLHS, CanonicalFoundRHS);
11937// x <u y && y >=s 0 --> x <s y. 11938// If we can prove the left part, the right part is also proven. 11939return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
11940 CanonicalRHS, CanonicalFoundLHS,
11941 CanonicalFoundRHS);
11944// x <s y && y <s 0 --> x <u y. 11945// If we can prove the left part, the right part is also proven. 11946return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
11947 CanonicalRHS, CanonicalFoundLHS,
11948 CanonicalFoundRHS);
11951// Check if we can make progress by sharpening ranges. 11953 (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
11956constSCEV *
V =
nullptr;
11958if (isa<SCEVConstant>(FoundLHS)) {
11959C = cast<SCEVConstant>(FoundLHS);
11962C = cast<SCEVConstant>(FoundRHS);
11966// The guarding predicate tells us that C != V. If the known range 11967// of V is [C, t), we can sharpen the range to [C + 1, t). The 11968// range we consider has to correspond to same signedness as the 11969// predicate we're interested in folding. 11974if (Min ==
C->getAPInt()) {
11975// Given (V >= Min && V != Min) we conclude V >= (Min + 1). 11976// This is true even if (Min + 1) wraps around -- in case of 11977// wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). 11979APInt SharperMin = Min + 1;
11984// We know V `Pred` SharperMin. If this implies LHS `Pred` 11986if (isImpliedCondOperands(Pred, LHS, RHS, V,
getConstant(SharperMin),
11993// We know from the range information that (V `Pred` Min || 11994// V == Min). We know from the guarding condition that !(V 11995// == Min). This gives us 11997// V `Pred` Min || V == Min && !(V == Min) 12000// If V `Pred` Min implies LHS `Pred` RHS, we're done. 12002if (isImpliedCondOperands(Pred, LHS, RHS, V,
getConstant(Min), CtxI))
12006// `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively. 12028// Check whether the actual condition is beyond sufficient. 12031if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
12035if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
12038if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS))
12041// Otherwise assume the worst. 12045bool ScalarEvolution::splitBinaryAdd(
constSCEV *Expr,
12048constauto *AE = dyn_cast<SCEVAddExpr>(Expr);
12049if (!AE || AE->getNumOperands() != 2)
12052L = AE->getOperand(0);
12053R = AE->getOperand(1);
12054Flags = AE->getNoWrapFlags();
12058std::optional<APInt>
12060// We avoid subtracting expressions here because this function is usually 12061// fairly deep in the call stack (i.e. is called many times). 12065APInt DiffMul(BW, 1);
12066// Try various simplifications to reduce the difference to a constant. Limit 12067// the number of allowed simplifications to keep compile-time low. 12068for (
unsignedI = 0;
I < 8; ++
I) {
12072// Reduce addrecs with identical steps to their start value. 12073if (isa<SCEVAddRecExpr>(
Less) && isa<SCEVAddRecExpr>(More)) {
12074constauto *LAR = cast<SCEVAddRecExpr>(
Less);
12075constauto *MAR = cast<SCEVAddRecExpr>(More);
12077if (LAR->getLoop() != MAR->getLoop())
12078return std::nullopt;
12080// We look at affine expressions only; not for correctness but to keep 12081// getStepRecurrence cheap. 12082if (!LAR->isAffine() || !MAR->isAffine())
12083return std::nullopt;
12085if (LAR->getStepRecurrence(*
this) != MAR->getStepRecurrence(*
this))
12086return std::nullopt;
12088Less = LAR->getStart();
12089 More = MAR->getStart();
12093// Try to match a common constant multiply. 12094auto MatchConstMul =
12095 [](
constSCEV *S) -> std::optional<std::pair<const SCEV *, APInt>> {
12096auto *M = dyn_cast<SCEVMulExpr>(S);
12097if (!M || M->getNumOperands() != 2 ||
12098 !isa<SCEVConstant>(M->getOperand(0)))
12099return std::nullopt;
12101 {M->getOperand(1), cast<SCEVConstant>(M->getOperand(0))->getAPInt()}};
12103if (
auto MatchedMore = MatchConstMul(More)) {
12104if (
auto MatchedLess = MatchConstMul(
Less)) {
12105if (MatchedMore->second == MatchedLess->second) {
12106 More = MatchedMore->first;
12107Less = MatchedLess->first;
12108 DiffMul *= MatchedMore->second;
12114// Try to cancel out common factors in two add expressions. 12117if (
auto *
C = dyn_cast<SCEVConstant>(S)) {
12119 Diff +=
C->getAPInt() * DiffMul;
12122 Diff -=
C->getAPInt() * DiffMul;
12125 Multiplicity[S] +=
Mul;
12127auto Decompose = [&](
constSCEV *S,
intMul) {
12128if (isa<SCEVAddExpr>(S)) {
12134 Decompose(More, 1);
12135 Decompose(
Less, -1);
12137// Check whether all the non-constants cancel out, or reduce to new 12138// More/Less values. 12139constSCEV *NewMore =
nullptr, *NewLess =
nullptr;
12140for (
constauto &[S,
Mul] : Multiplicity) {
12145return std::nullopt;
12147 }
elseif (
Mul == -1) {
12149return std::nullopt;
12152return std::nullopt;
12155// Values stayed the same, no point in trying further. 12156if (NewMore == More || NewLess ==
Less)
12157return std::nullopt;
12162// Reduced to constant. 12166// Left with variable on only one side, bail out. 12168return std::nullopt;
12171// Did not reduce to constant. 12172return std::nullopt;
12175bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
12178// Try to recognize the following pattern: 12183// FoundLHS = {Start,+,W} 12184// context_bb: // Basic block from the same loop 12185// known(Pred, FoundLHS, FoundRHS) 12187// If some predicate is known in the context of a loop, it is also known on 12188// each iteration of this loop, including the first iteration. Therefore, in 12189// this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to 12190// prove the original pred using this fact. 12194// Make sure AR varies in the context block. 12195if (
auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) {
12197// Make sure that context belongs to the loop and executes on 1st iteration 12198// (if it ever executes at all). 12199if (!L->contains(ContextBB) || !DT.
dominates(ContextBB, L->getLoopLatch()))
12203return isImpliedCondOperands(Pred, LHS, RHS, AR->
getStart(), FoundRHS);
12206if (
auto *AR = dyn_cast<SCEVAddRecExpr>(FoundRHS)) {
12208// Make sure that context belongs to the loop and executes on 1st iteration 12209// (if it ever executes at all). 12210if (!L->contains(ContextBB) || !DT.
dominates(ContextBB, L->getLoopLatch()))
12214return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->
getStart());
12220bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
CmpPredicate Pred,
12223constSCEV *FoundLHS,
12224constSCEV *FoundRHS) {
12228constauto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
12232constauto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
12233if (!AddRecFoundLHS)
12236// We'd like to let SCEV reason about control dependencies, so we constrain 12237// both the inequalities to be about add recurrences on the same loop. This 12238// way we can use isLoopEntryGuardedByCond later. 12240constLoop *
L = AddRecFoundLHS->getLoop();
12241if (L != AddRecLHS->getLoop())
12244// FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) 12246// FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) 12249// Informal proof for (2), assuming (1) [*]: 12251// We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] 12255// FoundLHS s< FoundRHS s< INT_MIN - C 12256// <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] 12257// <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] 12258// <=> (FoundLHS + INT_MIN + C + INT_MIN) s< 12259// (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] 12260// <=> FoundLHS + C s< FoundRHS + C 12262// [*]: (1) can be proved by ruling out overflow. 12264// [**]: This can be proved by analyzing all the four possibilities: 12265// (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and 12266// (A s>= 0, B s>= 0). 12269// Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" 12270// will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS 12271// = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS 12272// s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is 12273// neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + 12280if (!RDiff || *LDiff != *RDiff)
12283if (LDiff->isMinValue())
12286APInt FoundRHSLimit;
12289 FoundRHSLimit = -(*RDiff);
12295// Try to prove (1) or (2), as needed. 12302constSCEV *RHS,
constSCEV *FoundLHS,
12304constPHINode *LPhi =
nullptr, *RPhi =
nullptr;
12308bool Erased = PendingMerges.erase(LPhi);
12309assert(Erased &&
"Failed to erase LPhi!");
12313bool Erased = PendingMerges.erase(RPhi);
12314assert(Erased &&
"Failed to erase RPhi!");
12319// Find respective Phis and check that they are not being pending. 12320if (
constSCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS))
12321if (
auto *Phi = dyn_cast<PHINode>(LU->getValue())) {
12322if (!PendingMerges.insert(Phi).second)
12326if (
constSCEVUnknown *RU = dyn_cast<SCEVUnknown>(RHS))
12327if (
auto *Phi = dyn_cast<PHINode>(RU->getValue())) {
12328// If we detect a loop of Phi nodes being processed by this method, for 12331// %a = phi i32 [ %some1, %preheader ], [ %b, %latch ] 12332// %b = phi i32 [ %some2, %preheader ], [ %a, %latch ] 12334// we don't want to deal with a case that complex, so return conservative 12336if (!PendingMerges.insert(Phi).second)
12341// If none of LHS, RHS is a Phi, nothing to do here. 12345// If there is a SCEVUnknown Phi we are interested in, make it left. 12353assert(LPhi &&
"LPhi should definitely be a SCEVUnknown Phi!");
12357auto ProvedEasily = [&](
constSCEV *
S1,
constSCEV *S2) {
12358return isKnownViaNonRecursiveReasoning(Pred,
S1, S2) ||
12359 isImpliedCondOperandsViaRanges(Pred,
S1, S2, Pred, FoundLHS, FoundRHS) ||
12360 isImpliedViaOperations(Pred,
S1, S2, FoundLHS, FoundRHS,
Depth);
12363if (RPhi && RPhi->getParent() == LBB) {
12364// Case one: RHS is also a SCEVUnknown Phi from the same basic block. 12365// If we compare two Phis from the same block, and for each entry block 12366// the predicate is true for incoming values from this block, then the 12367// predicate is also true for the Phis. 12370constSCEV *
R =
getSCEV(RPhi->getIncomingValueForBlock(IncBB));
12371if (!ProvedEasily(L, R))
12375// Case two: RHS is also a Phi from the same basic block, and it is an 12376// AddRec. It means that there is a loop which has both AddRec and Unknown 12377// PHIs, for it we can compare incoming values of AddRec from above the loop 12378// and latch with their respective incoming values of LPhi. 12379// TODO: Generalize to handle loops with many inputs in a header. 12383auto *Predecessor = RLoop->getLoopPredecessor();
12384assert(Predecessor &&
"Loop with AddRec with no predecessor?");
12386if (!ProvedEasily(L1, RAR->
getStart()))
12388auto *Latch = RLoop->getLoopLatch();
12389assert(Latch &&
"Loop with AddRec with no latch?");
12394// In all other cases go over inputs of LHS and compare each of them to RHS, 12395// the predicate is true for (LHS, RHS) if it is true for all such pairs. 12396// At this point RHS is either a non-Phi, or it is a Phi from some block 12397// different from LBB. 12399// Check that RHS is available in this block. 12403// Make sure L does not refer to a value from a potentially previous 12404// iteration of a loop. 12407if (!ProvedEasily(L, RHS))
12414bool ScalarEvolution::isImpliedCondOperandsViaShift(
CmpPredicate Pred,
12417constSCEV *FoundLHS,
12418constSCEV *FoundRHS) {
12419// We want to imply LHS < RHS from LHS < (RHS >> shiftvalue). First, make 12420// sure that we are dealing with same LHS. 12421if (RHS == FoundRHS) {
12426if (LHS != FoundLHS)
12429auto *SUFoundRHS = dyn_cast<SCEVUnknown>(FoundRHS);
12433Value *Shiftee, *ShiftValue;
12435using namespacePatternMatch;
12436if (
match(SUFoundRHS->getValue(),
12438auto *ShifteeS =
getSCEV(Shiftee);
12439// Prove one of the following: 12440// LHS <u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <u RHS 12441// LHS <=u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <=u RHS 12442// LHS <s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0 12444// LHS <=s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0 12456bool ScalarEvolution::isImpliedCondOperands(
CmpPredicate Pred,
constSCEV *LHS,
12458constSCEV *FoundLHS,
12459constSCEV *FoundRHS,
12461if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, Pred, FoundLHS, FoundRHS))
12464if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
12467if (isImpliedCondOperandsViaShift(Pred, LHS, RHS, FoundLHS, FoundRHS))
12470if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
12474return isImpliedCondOperandsHelper(Pred, LHS, RHS,
12475 FoundLHS, FoundRHS);
12478/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values? 12479template <
typename MinMaxExprType>
12481constSCEV *Candidate) {
12482const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
12492// If both sides are affine addrecs for the same loop, with equal 12493// steps, and we know the recurrences don't wrap, then we only 12494// need to check the predicate on the starting values. 12521/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max 12535 IsMinMaxConsistingOf<SCEVSMinExpr>(
LHS,
RHS) ||
12537 IsMinMaxConsistingOf<SCEVSMaxExpr>(
RHS,
LHS);
12545// FIXME: what about umin_seq? 12546 IsMinMaxConsistingOf<SCEVUMinExpr>(
LHS,
RHS) ||
12548 IsMinMaxConsistingOf<SCEVUMaxExpr>(
RHS,
LHS);
12554bool ScalarEvolution::isImpliedViaOperations(
CmpPredicate Pred,
constSCEV *LHS,
12556constSCEV *FoundLHS,
12557constSCEV *FoundRHS,
12561"LHS and RHS have different sizes?");
12564"FoundLHS and FoundRHS have different sizes?");
12565// We want to avoid hurting the compile time with analysis of too big trees. 12569// We only want to work with GT comparison so far. 12576// For unsigned, try to reduce it to corresponding signed comparison. 12578// We can replace unsigned predicate with its signed counterpart if all 12579// involved values are non-negative. 12580// TODO: We could have better support for unsigned. 12582// Knowing that both FoundLHS and FoundRHS are non-negative, and knowing 12583// FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us 12584// use this fact to prove that LHS and RHS are non-negative. 12596auto GetOpFromSExt = [&](
constSCEV *S) {
12597if (
auto *Ext = dyn_cast<SCEVSignExtendExpr>(S))
12598returnExt->getOperand();
12599// TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off 12600// the constant in some cases. 12604// Acquire values from extensions. 12606auto *OrigFoundLHS = FoundLHS;
12607LHS = GetOpFromSExt(LHS);
12608 FoundLHS = GetOpFromSExt(FoundLHS);
12610// Is the SGT predicate can be proved trivially or using the found context. 12611auto IsSGTViaContext = [&](
constSCEV *
S1,
constSCEV *S2) {
12614 FoundRHS,
Depth + 1);
12617if (
auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) {
12618// We want to avoid creation of any new non-constant SCEV. Since we are 12619// going to compare the operands to RHS, we should be certain that we don't 12620// need any size extensions for this. So let's decline all cases when the 12621// sizes of types of LHS and RHS do not match. 12622// TODO: Maybe try to get RHS from sext to catch more cases? 12626// Should not overflow. 12627if (!LHSAddExpr->hasNoSignedWrap())
12630auto *LL = LHSAddExpr->getOperand(0);
12631auto *LR = LHSAddExpr->getOperand(1);
12634// Checks that S1 >= 0 && S2 > RHS, trivially or using the found context. 12635auto IsSumGreaterThanRHS = [&](
constSCEV *
S1,
constSCEV *S2) {
12636return IsSGTViaContext(
S1, MinusOne) && IsSGTViaContext(S2, RHS);
12638// Try to prove the following rule: 12639// (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS). 12640// (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS). 12641if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL))
12643 }
elseif (
auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
12645// FIXME: Once we have SDiv implemented, we can get rid of this matching. 12650// Rules for division. 12651// We are going to perform some comparisons with Denominator and its 12652// derivative expressions. In general case, creating a SCEV for it may 12653// lead to a complex analysis of the entire graph, and in particular it 12654// can request trip count recalculation for the same loop. This would 12655// cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid 12656// this, we only want to create SCEVs that are constants in this section. 12657// So we bail if Denominator is not a constant. 12658if (!isa<ConstantInt>(LR))
12661auto *Denominator = cast<SCEVConstant>(
getSCEV(LR));
12663// We want to make sure that LHS = FoundLHS / Denominator. If it is so, 12664// then a SCEV for the numerator already exists and matches with FoundLHS. 12666if (!Numerator || Numerator->getType() != FoundLHS->
getType())
12669// Make sure that the numerator matches with FoundLHS and the denominator 12674auto *DTy = Denominator->getType();
12675auto *FRHSTy = FoundRHS->
getType();
12676if (DTy->isPointerTy() != FRHSTy->isPointerTy())
12677// One of types is a pointer and another one is not. We cannot extend 12678// them properly to a wider type, so let us just reject this case. 12679// TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help 12680// to avoid this check. 12684// FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0. 12689// Try to prove the following rule: 12690// (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS). 12691// For example, given that FoundLHS > 2. It means that FoundLHS is at 12692// least 3. If we divide it by Denominator < 4, we will have at least 1. 12695 IsSGTViaContext(FoundRHSExt, DenomMinusTwo))
12698// Try to prove the following rule: 12699// (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS). 12700// For example, given that FoundLHS > -3. Then FoundLHS is at least -2. 12701// If we divide it by Denominator > 2, then: 12702// 1. If FoundLHS is negative, then the result is 0. 12703// 2. If FoundLHS is non-negative, then the result is non-negative. 12704// Anyways, the result is non-negative. 12706auto *NegDenomMinusOne =
getMinusSCEV(MinusOne, DenominatorExt);
12708 IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
12713// If our expression contained SCEVUnknown Phis, and we split it down and now 12714// need to prove something for them, try to prove the predicate for every 12715// possible incoming values of those Phis. 12716if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS,
Depth + 1))
12724// zext x u<= sext x, sext x s<= zext x 12731// If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt. 12739// If operand >=u 0 then ZExt == SExt. If operand <u 0 then ZExt <u SExt. 12749bool ScalarEvolution::isKnownViaNonRecursiveReasoning(
CmpPredicate Pred,
12753 isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
12756 isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
12759bool ScalarEvolution::isImpliedCondOperandsHelper(
CmpPredicate Pred,
12762constSCEV *FoundLHS,
12763constSCEV *FoundRHS) {
12798// Maybe it can be proved via operations? 12799if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS))
12805bool ScalarEvolution::isImpliedCondOperandsViaRanges(
12807constSCEV *FoundLHS,
constSCEV *FoundRHS) {
12808if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
12809// The restriction on `FoundRHS` be lifted easily -- it exists only to 12810// reduce the compile time impact of this optimization. 12817constAPInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
12819// `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the 12820// antecedent "`FoundLHS` `FoundPred` `FoundRHS`". 12824// Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`: 12827// We can also compute the range of values for `LHS` that satisfy the 12828// consequent, "`LHS` `Pred` `RHS`": 12829constAPInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
12830// The antecedent implies the consequent if every value of `LHS` that 12831// satisfies the antecedent also satisfies the consequent. 12832return LHSRange.
icmp(Pred, ConstRHS);
12835bool ScalarEvolution::canIVOverflowOnLT(
constSCEV *RHS,
constSCEV *Stride,
12847// SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! 12848return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
12855// UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! 12856return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
12859bool ScalarEvolution::canIVOverflowOnGT(
constSCEV *RHS,
constSCEV *Stride,
12870// SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! 12871return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
12878// UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! 12879return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
12883// umin(N, 1) + floor((N - umin(N, 1)) / D) 12884// This is equivalent to "1 + floor((N - 1) / D)" for N != 0. The umin 12885// expression fixes the case of N=0. 12891constSCEV *ScalarEvolution::computeMaxBECountForLT(
constSCEV *Start,
12896// The logic in this function assumes we can represent a positive stride. 12897// If we can't, the backedge-taken count must be zero. 12901// This code below only been closely audited for negative strides in the 12902// unsigned comparison case, it may be correct for signed comparison, but 12903// that needs to be established. 12907// Calculate the maximum backedge count based on the range of values 12908// permitted by Start, End, and Stride. 12915// We assume either the stride is positive, or the backedge-taken count 12916// is zero. So force StrideForMaxBECount to be at least one. 12919 : APIntOps::umax(One, MinStride);
12923APInt Limit = MaxValue - (StrideForMaxBECount - 1);
12925// Although End can be a MAX expression we estimate MaxEnd considering only 12926// the case End = RHS of the loop termination condition. This is safe because 12927// in the other case (End - Start) is zero, leading to a zero maximum backedge 12932// MaxBECount = ceil((max(MaxEnd, MinStart) - MinStart) / Stride) 12934 : APIntOps::umax(MaxEnd, MinStart);
12941ScalarEvolution::howManyLessThans(
constSCEV *LHS,
constSCEV *RHS,
12942constLoop *L,
bool IsSigned,
12943bool ControlsOnlyExit,
bool AllowPredicates) {
12947bool PredicatedIV =
false;
12949if (
auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS)) {
12950constSCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
12952auto canProveNUW = [&]() {
12953// We can use the comparison to infer no-wrap flags only if it fully 12954// controls the loop exit. 12955if (!ControlsOnlyExit)
12962// We need the sequence defined by AR to strictly increase in the 12963// unsigned integer domain for the logic below to hold. 12968// If RHS <=u Limit, then there must exist a value V in the sequence 12969// defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and 12970// V <=u UINT_MAX. Thus, we must exit the loop before unsigned 12971// overflow occurs. This limit also implies that a signed comparison 12972// (in the wide bitwidth) is equivalent to an unsigned comparison as 12973// the high bits on both sides must be zero. 12976 Limit = Limit.
zext(OuterBitWidth);
12985// Emulate what getZeroExtendExpr would have done during construction 12986// if we'd been able to infer the fact just above at that time. 12988Type *Ty = ZExt->getType();
12990 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty,
this, 0),
12992IV = dyn_cast<SCEVAddRecExpr>(S);
12999if (!
IV && AllowPredicates) {
13000// Try to make this an AddRec using runtime tests, in the first X 13001// iterations of this loop, where X is the SCEV expression found by the 13004 PredicatedIV =
true;
13007// Avoid weird loops 13008if (!
IV ||
IV->getLoop() != L || !
IV->isAffine())
13011// A precondition of this method is that the condition being analyzed 13012// reaches an exiting branch which dominates the latch. Given that, we can 13013// assume that an increment which violates the nowrap specification and 13014// produces poison must cause undefined behavior when the resulting poison 13015// value is branched upon and thus we can conclude that the backedge is 13016// taken no more often than would be required to produce that poison value. 13017// Note that a well defined loop can exit on the iteration which violates 13018// the nowrap specification if there is another exit (either explicit or 13019// implicit/exceptional) which causes the loop to execute before the 13020// exiting instruction we're analyzing would trigger UB. 13022bool NoWrap = ControlsOnlyExit &&
IV->getNoWrapFlags(WrapType);
13025constSCEV *Stride =
IV->getStepRecurrence(*
this);
13029// Avoid negative or zero stride values. 13030if (!PositiveStride) {
13031// We can compute the correct backedge taken count for loops with unknown 13032// strides if we can prove that the loop is not an infinite loop with side 13033// effects. Here's the loop structure we are trying to handle - 13039// } while (i < end); 13041// The backedge taken count for such loops is evaluated as - 13042// (max(end, start + stride) - start - 1) /u stride 13044// The additional preconditions that we need to check to prove correctness 13045// of the above formula is as follows - 13047// a) IV is either nuw or nsw depending upon signedness (indicated by the 13049// b) the loop is guaranteed to be finite (e.g. is mustprogress and has 13050// no side effects within the loop) 13051// c) loop has a single static exit (with no abnormal exits) 13053// Precondition a) implies that if the stride is negative, this is a single 13054// trip loop. The backedge taken count formula reduces to zero in this case. 13056// Precondition b) and c) combine to imply that if rhs is invariant in L, 13057// then a zero stride means the backedge can't be taken without executing 13058// undefined behavior. 13060// The positive stride case is the same as isKnownPositive(Stride) returning 13061// true (original behavior of the function). 13068// If we have a step of zero, and RHS isn't invariant in L, we don't know 13069// if it might eventually be greater than start and if so, on which 13070// iteration. We can't even produce a useful upper bound. 13074// We allow a potentially zero stride, but we need to divide by stride 13075// below. Since the loop can't be infinite and this check must control 13076// the sole exit, we can infer the exit must be taken on the first 13077// iteration (e.g. backedge count = 0) if the stride is zero. Given that, 13078// we know the numerator in the divides below must be zero, so we can 13079// pick an arbitrary non-zero value for the denominator (e.g. stride) 13080// and produce the right result. 13081// FIXME: Handle the case where Stride is poison? 13082auto wouldZeroStrideBeUB = [&]() {
13083// Proof by contradiction. Suppose the stride were zero. If we can 13084// prove that the backedge *is* taken on the first iteration, then since 13085// we know this condition controls the sole exit, we must have an 13086// infinite loop. We can't have a (well defined) infinite loop per 13087// check just above. 13088// Note: The (Start - Stride) term is used to get the start' term from 13089// (start' + stride,+,stride). Remember that we only care about the 13090// result of this expression when stride == 0 at runtime. 13094if (!wouldZeroStrideBeUB()) {
13098 }
elseif (!NoWrap) {
13099// Avoid proven overflow cases: this will ensure that the backedge taken 13100// count will not generate any unsigned overflow. 13101if (canIVOverflowOnLT(RHS, Stride, IsSigned))
13105// On all paths just preceeding, we established the following invariant: 13106// IV can be assumed not to overflow up to and including the exiting 13107// iteration. We proved this in one of two ways: 13108// 1) We can show overflow doesn't occur before the exiting iteration 13109// 1a) canIVOverflowOnLT, and b) step of one 13110// 2) We can show that if overflow occurs, the loop must execute UB 13111// before any possible exit. 13112// Note that we have not yet proved RHS invariant (in general). 13114constSCEV *Start =
IV->getStart();
13116// Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond. 13117// If we convert to integers, isLoopEntryGuardedByCond will miss some cases. 13118// Use integer-typed versions for actual computation; we can't subtract 13119// pointers in general. 13120constSCEV *OrigStart = Start;
13122if (Start->getType()->isPointerTy()) {
13124if (isa<SCEVCouldNotCompute>(Start))
13129if (isa<SCEVCouldNotCompute>(RHS))
13133constSCEV *
End =
nullptr, *BECount =
nullptr,
13134 *BECountIfBackedgeTaken =
nullptr;
13136constauto *RHSAddRec = dyn_cast<SCEVAddRecExpr>(RHS);
13137if (PositiveStride && RHSAddRec !=
nullptr && RHSAddRec->getLoop() == L &&
13138 RHSAddRec->getNoWrapFlags()) {
13139// The structure of loop we are trying to calculate backedge count of: 13141// left = left_start 13142// right = right_start 13144// while(left < right){ 13145// ... do something here ... 13146// left += s1; // stride of left is s1 (s1 > 0) 13147// right += s2; // stride of right is s2 (s2 < 0) 13151constSCEV *RHSStart = RHSAddRec->getStart();
13152constSCEV *RHSStride = RHSAddRec->getStepRecurrence(*
this);
13154// If Stride - RHSStride is positive and does not overflow, we can write 13155// backedge count as -> 13156// ceil((End - Start) /u (Stride - RHSStride)) 13157// Where, End = max(RHSStart, Start) 13159// Check if RHSStride < 0 and Stride - RHSStride will not overflow. 13169// We can do this because End >= Start, as End = max(RHSStart, Start) 13173 BECountIfBackedgeTaken =
13178if (BECount ==
nullptr) {
13179// If we cannot calculate ExactBECount, we can calculate the MaxBECount, 13180// given the start, stride and max value for the end bound of the 13181// loop (RHS), and the fact that IV does not overflow (which is 13183constSCEV *MaxBECount = computeMaxBECountForLT(
13186 MaxBECount,
false/*MaxOrZero*/, Predicates);
13189// We use the expression (max(End,Start)-Start)/Stride to describe the 13190// backedge count, as if the backedge is taken at least once 13191// max(End,Start) is End and so the result is as above, and if not 13192// max(End,Start) is Start so we get a backedge count of zero. 13193auto *OrigStartMinusStride =
getMinusSCEV(OrigStart, Stride);
13197// Can we prove (max(RHS,Start) > Start - Stride? 13200// In this case, we can use a refined formula for computing backedge 13201// taken count. The general formula remains: 13202// "End-Start /uceiling Stride" where "End = max(RHS,Start)" 13203// We want to use the alternate formula: 13204// "((End - 1) - (Start - Stride)) /u Stride" 13205// Let's do a quick case analysis to show these are equivalent under 13206// our precondition that max(RHS,Start) > Start - Stride. 13207// * For RHS <= Start, the backedge-taken count must be zero. 13208// "((End - 1) - (Start - Stride)) /u Stride" reduces to 13209// "((Start - 1) - (Start - Stride)) /u Stride" which simplies to 13210// "Stride - 1 /u Stride" which is indeed zero for all non-zero values 13211// of Stride. For 0 stride, we've use umin(1,Stride) above, 13212// reducing this to the stride of 1 case. 13213// * For RHS >= Start, the backedge count must be "RHS-Start /uceil 13215// "((End - 1) - (Start - Stride)) /u Stride" reduces to 13216// "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to 13217// "((RHS - (Start - Stride) - 1) /u Stride". 13218// Our preconditions trivially imply no overflow in that form. 13220constSCEV *Numerator =
13226auto canProveRHSGreaterThanEqualStart = [&]() {
13235// (RHS > Start - 1) implies RHS >= Start. 13236// * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if 13237// "Start - 1" doesn't overflow. 13238// * For signed comparison, if Start - 1 does overflow, it's equal 13239// to INT_MAX, and "RHS >s INT_MAX" is trivially false. 13240// * For unsigned comparison, if Start - 1 does overflow, it's equal 13241// to UINT_MAX, and "RHS >u UINT_MAX" is trivially false. 13243// FIXME: Should isLoopEntryGuardedByCond do this for us? 13245auto *StartMinusOne =
13250// If we know that RHS >= Start in the context of loop, then we know 13251// that max(RHS, Start) = RHS at this point. 13252if (canProveRHSGreaterThanEqualStart()) {
13255// If RHS < Start, the backedge will be taken zero times. So in 13256// general, we can write the backedge-taken count as: 13258// RHS >= Start ? ceil(RHS - Start) / Stride : 0 13260// We convert it to the following to make it more convenient for SCEV: 13262// ceil(max(RHS, Start) - Start) / Stride 13265// See what would happen if we assume the backedge is taken. This is 13266// used to compute MaxBECount. 13267 BECountIfBackedgeTaken =
13271// At this point, we know: 13273// 1. If IsSigned, Start <=s End; otherwise, Start <=u End 13274// 2. The index variable doesn't overflow. 13276// Therefore, we know N exists such that 13277// (Start + Stride * N) >= End, and computing "(Start + Stride * N)" 13278// doesn't overflow. 13280// Using this information, try to prove whether the addition in 13281// "(Start - End) + (Stride - 1)" has unsigned overflow. 13283bool MayAddOverflow = [&] {
13285// Suppose Stride is a power of two, and Start/End are unsigned 13286// integers. Let UMAX be the largest representable unsigned 13289// By the preconditions of this function, we know 13290// "(Start + Stride * N) >= End", and this doesn't overflow. 13293// End <= (Start + Stride * N) <= UMAX 13295// Subtracting Start from all the terms: 13297// End - Start <= Stride * N <= UMAX - Start 13299// Since Start is unsigned, UMAX - Start <= UMAX. Therefore: 13301// End - Start <= Stride * N <= UMAX 13303// Stride * N is a multiple of Stride. Therefore, 13305// End - Start <= Stride * N <= UMAX - (UMAX mod Stride) 13307// Since Stride is a power of two, UMAX + 1 is divisible by 13308// Stride. Therefore, UMAX mod Stride == Stride - 1. So we can 13311// End - Start <= Stride * N <= UMAX - Stride - 1 13313// Dropping the middle term: 13315// End - Start <= UMAX - Stride - 1 13317// Adding Stride - 1 to both sides: 13319// (End - Start) + (Stride - 1) <= UMAX 13321// In other words, the addition doesn't have unsigned overflow. 13323// A similar proof works if we treat Start/End as signed values. 13324// Just rewrite steps before "End - Start <= Stride * N <= UMAX" 13325// to use signed max instead of unsigned max. Note that we're 13326// trying to prove a lack of unsigned overflow in either case. 13329if (Start == Stride || Start ==
getMinusSCEV(Stride, One)) {
13330// If Start is equal to Stride, (End - Start) + (Stride - 1) == End 13331// - 1. If !IsSigned, 0 <u Stride == Start <=u End; so 0 <u End - 1 13332// <u End. If IsSigned, 0 <s Stride == Start <=s End; so 0 <s End - 13335// If Start is equal to Stride - 1, (End - Start) + Stride - 1 == 13343if (!MayAddOverflow) {
13344// floor((D + (S - 1)) / S) 13345// We prefer this formulation if it's legal because it's fewer 13355constSCEV *ConstantMaxBECount;
13356bool MaxOrZero =
false;
13357if (isa<SCEVConstant>(BECount)) {
13358 ConstantMaxBECount = BECount;
13359 }
elseif (BECountIfBackedgeTaken &&
13360 isa<SCEVConstant>(BECountIfBackedgeTaken)) {
13361// If we know exactly how many times the backedge will be taken if it's 13362// taken at least once, then the backedge count will either be that or 13364 ConstantMaxBECount = BECountIfBackedgeTaken;
13367 ConstantMaxBECount = computeMaxBECountForLT(
13371if (isa<SCEVCouldNotCompute>(ConstantMaxBECount) &&
13372 !isa<SCEVCouldNotCompute>(BECount))
13375constSCEV *SymbolicMaxBECount =
13376 isa<SCEVCouldNotCompute>(BECount) ? ConstantMaxBECount : BECount;
13377return ExitLimit(BECount, ConstantMaxBECount, SymbolicMaxBECount, MaxOrZero,
13382constSCEV *LHS,
constSCEV *RHS,
constLoop *L,
bool IsSigned,
13383bool ControlsOnlyExit,
bool AllowPredicates) {
13385// We handle only IV > Invariant 13390if (!
IV && AllowPredicates)
13391// Try to make this an AddRec using runtime tests, in the first X 13392// iterations of this loop, where X is the SCEV expression found by the 13396// Avoid weird loops 13397if (!
IV ||
IV->getLoop() != L || !
IV->isAffine())
13401bool NoWrap = ControlsOnlyExit &&
IV->getNoWrapFlags(WrapType);
13406// Avoid negative or zero stride values 13410// Avoid proven overflow cases: this will ensure that the backedge taken count 13411// will not generate any unsigned overflow. Relaxed no-overflow conditions 13412// exploit NoWrapFlags, allowing to optimize in presence of undefined 13413// behaviors like the case of C language. 13414if (!Stride->
isOne() && !NoWrap)
13415if (canIVOverflowOnGT(RHS, Stride, IsSigned))
13418constSCEV *Start =
IV->getStart();
13421// If we know that Start >= RHS in the context of loop, then we know that 13422// min(RHS, Start) = RHS at this point. 13430if (Start->getType()->isPointerTy()) {
13432if (isa<SCEVCouldNotCompute>(Start))
13435if (
End->getType()->isPointerTy()) {
13437if (isa<SCEVCouldNotCompute>(
End))
13441// Compute ((Start - End) + (Stride - 1)) / Stride. 13442// FIXME: This can overflow. Holding off on fixing this for now; 13443// howManyGreaterThans will hopefully be gone soon. 13458// Although End can be a MIN expression we estimate MinEnd considering only 13459// the case End = RHS. This is safe because in the other case (Start - End) 13460// is zero, leading to a zero maximum backedge taken count. 13465constSCEV *ConstantMaxBECount =
13466 isa<SCEVConstant>(BECount)
13471if (isa<SCEVCouldNotCompute>(ConstantMaxBECount))
13472 ConstantMaxBECount = BECount;
13473constSCEV *SymbolicMaxBECount =
13474 isa<SCEVCouldNotCompute>(BECount) ? ConstantMaxBECount : BECount;
13476return ExitLimit(BECount, ConstantMaxBECount, SymbolicMaxBECount,
false,
13485// If the start is a non-zero constant, shift the range to simplify things. 13486if (
constSCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
13487if (!SC->getValue()->isZero()) {
13491 getNoWrapFlags(FlagNW));
13492if (
constauto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
13493return ShiftedAddRec->getNumIterationsInRange(
13495// This is strange and shouldn't happen. 13499// The only time we can solve this is when we have all constant indices. 13500// Otherwise, we cannot determine the overflow conditions. 13501if (
any_of(operands(), [](
constSCEV *
Op) {
return !isa<SCEVConstant>(
Op); }))
13504// Okay at this point we know that all elements of the chrec are constants and 13505// that the start element is zero. 13507// First check to see if the range contains zero. If not, the first 13514// If this is an affine expression then we have this situation: 13515// Solve {0,+,A} in Range === Ax in Range 13517// We know that zero is in the range. If A is positive then we know that 13518// the upper value of the range must be the first possible exit value. 13519// If A is negative then the lower of the range is the last possible loop 13520// value. Also note that we already checked for a full range. 13521APIntA = cast<SCEVConstant>(getOperand(1))->getAPInt();
13524// The exit value should be (End+A)/A. 13528// Evaluate at the exit value. If we really did fall out of the valid 13529// range, then we computed our trip count, otherwise wrap around or other 13530// things must have happened. 13535// Ensure that the previous value is in the range. 13539"Linear scev computation is off in a bad way!");
13543if (isQuadratic()) {
13553assert(getNumOperands() > 1 &&
"AddRec with zero step?");
13554// There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)), 13555// but in this case we cannot guarantee that the value returned will be an 13556// AddRec because SCEV does not have a fixed point where it stops 13557// simplification: it is legal to return ({rec1} + {rec2}). For example, it 13558// may happen if we reach arithmetic depth limit while simplifying. So we 13559// construct the returned value explicitly. 13561// If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and 13562// (this + Step) is {A+B,+,B+C,+...,+,N}. 13563for (
unsigned i = 0, e = getNumOperands() - 1; i < e; ++i)
13565// We know that the last operand is not a constant zero (otherwise it would 13566// have been popped out earlier). This guarantees us that if the result has 13567// the same last operand, then it will also not be popped out, meaning that 13568// the returned value will be an AddRec. 13569constSCEV *
Last = getOperand(getNumOperands() - 1);
13570assert(!
Last->isZero() &&
"Recurrency with zero step?");
13572return cast<SCEVAddRecExpr>(SE.
getAddRecExpr(Ops, getLoop(),
13576// Return true when S contains at least an undef value. 13579if (
constauto *SU = dyn_cast<SCEVUnknown>(S))
13580return isa<UndefValue>(SU->
getValue());
13585// Return true when S contains a value that is a nullptr. 13588if (
constauto *SU = dyn_cast<SCEVUnknown>(S))
13594/// Return the size of an element read or written by Inst. 13597if (
StoreInst *Store = dyn_cast<StoreInst>(Inst))
13598 Ty = Store->getValueOperand()->getType();
13599elseif (
LoadInst *Load = dyn_cast<LoadInst>(Inst))
13600 Ty = Load->getType();
13608//===----------------------------------------------------------------------===// 13609// SCEVCallbackVH Class Implementation 13610//===----------------------------------------------------------------------===// 13612void ScalarEvolution::SCEVCallbackVH::deleted() {
13613assert(SE &&
"SCEVCallbackVH called with a null ScalarEvolution!");
13614if (
PHINode *PN = dyn_cast<PHINode>(getValPtr()))
13615 SE->ConstantEvolutionLoopExitValue.erase(PN);
13616 SE->eraseValueFromMap(getValPtr());
13617// this now dangles! 13620void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(
Value *V) {
13621assert(SE &&
"SCEVCallbackVH called with a null ScalarEvolution!");
13623// Forget all the expressions associated with users of the old value, 13624// so that future queries will recompute the expressions using the new 13627// this now dangles! 13633//===----------------------------------------------------------------------===// 13634// ScalarEvolution Class Implementation 13635//===----------------------------------------------------------------------===// 13640 :
F(
F),
DL(
F.getDataLayout()), TLI(TLI), AC(AC), DT(DT), LI(LI),
13642 LoopDispositions(64), BlockDispositions(64) {
13643// To use guards for proving predicates, we need to scan every instruction in 13644// relevant basic blocks, and not just terminators. Doing this is a waste of 13645// time if the IR does not actually contain any calls to 13646// @llvm.experimental.guard, so do a quick check and remember this beforehand. 13648// This pessimizes the case where a pass that preserves ScalarEvolution wants 13649// to _add_ guards to the module when there weren't any before, and wants 13650// ScalarEvolution to optimize based on those guards. For now we prefer to be 13651// efficient in lieu of being smart in that rather obscure case. 13654F.getParent(), Intrinsic::experimental_guard);
13655 HasGuards = GuardDecl && !GuardDecl->use_empty();
13659 :
F(Arg.
F),
DL(Arg.
DL), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC),
13660 DT(Arg.DT), LI(Arg.LI), CouldNotCompute(
std::
move(Arg.CouldNotCompute)),
13661 ValueExprMap(
std::
move(Arg.ValueExprMap)),
13662 PendingLoopPredicates(
std::
move(Arg.PendingLoopPredicates)),
13663 PendingPhiRanges(
std::
move(Arg.PendingPhiRanges)),
13664 PendingMerges(
std::
move(Arg.PendingMerges)),
13665 ConstantMultipleCache(
std::
move(Arg.ConstantMultipleCache)),
13666 BackedgeTakenCounts(
std::
move(Arg.BackedgeTakenCounts)),
13667 PredicatedBackedgeTakenCounts(
13668std::
move(Arg.PredicatedBackedgeTakenCounts)),
13669 BECountUsers(
std::
move(Arg.BECountUsers)),
13670 ConstantEvolutionLoopExitValue(
13671std::
move(Arg.ConstantEvolutionLoopExitValue)),
13672 ValuesAtScopes(
std::
move(Arg.ValuesAtScopes)),
13673 ValuesAtScopesUsers(
std::
move(Arg.ValuesAtScopesUsers)),
13674 LoopDispositions(
std::
move(Arg.LoopDispositions)),
13675 LoopPropertiesCache(
std::
move(Arg.LoopPropertiesCache)),
13676 BlockDispositions(
std::
move(Arg.BlockDispositions)),
13677 SCEVUsers(
std::
move(Arg.SCEVUsers)),
13678 UnsignedRanges(
std::
move(Arg.UnsignedRanges)),
13679 SignedRanges(
std::
move(Arg.SignedRanges)),
13680 UniqueSCEVs(
std::
move(Arg.UniqueSCEVs)),
13681 UniquePreds(
std::
move(Arg.UniquePreds)),
13682 SCEVAllocator(
std::
move(Arg.SCEVAllocator)),
13683 LoopUsers(
std::
move(Arg.LoopUsers)),
13684 PredicatedSCEVRewrites(
std::
move(Arg.PredicatedSCEVRewrites)),
13685 FirstUnknown(Arg.FirstUnknown) {
13686 Arg.FirstUnknown =
nullptr;
13690// Iterate through all the SCEVUnknown instances and call their 13691// destructors, so that they release their references to their values. 13695 Tmp->~SCEVUnknown();
13697 FirstUnknown =
nullptr;
13699 ExprValueMap.
clear();
13700 ValueExprMap.
clear();
13702 BackedgeTakenCounts.clear();
13703 PredicatedBackedgeTakenCounts.clear();
13705assert(PendingLoopPredicates.empty() &&
"isImpliedCond garbage");
13706assert(PendingPhiRanges.empty() &&
"getRangeRef garbage");
13707assert(PendingMerges.empty() &&
"isImpliedViaMerge garbage");
13708assert(!WalkingBEDominatingConds &&
"isLoopBackedgeGuardedByCond garbage!");
13709assert(!ProvingSplitPredicate &&
"ProvingSplitPredicate garbage!");
13716/// When printing a top-level SCEV for trip counts, it's helpful to include 13717/// a type for constants which are otherwise hard to disambiguate. 13719if (isa<SCEVConstant>(S))
13726// Print all inner loops first 13731 L->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13735 L->getExitingBlocks(ExitingBlocks);
13736if (ExitingBlocks.
size() != 1)
13737OS <<
"<multiple exits> ";
13740if (!isa<SCEVCouldNotCompute>(BTC)) {
13741OS <<
"backedge-taken count is ";
13744OS <<
"Unpredictable backedge-taken count.";
13747if (ExitingBlocks.
size() > 1)
13748for (
BasicBlock *ExitingBlock : ExitingBlocks) {
13749OS <<
" exit count for " << ExitingBlock->
getName() <<
": ";
13752if (isa<SCEVCouldNotCompute>(EC)) {
13753// Retry with predicates. 13756if (!isa<SCEVCouldNotCompute>(EC)) {
13757OS <<
"\n predicated exit count for " << ExitingBlock->
getName()
13760OS <<
"\n Predicates:\n";
13761for (
constauto *
P : Predicates)
13769 L->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13773if (!isa<SCEVCouldNotCompute>(ConstantBTC)) {
13774OS <<
"constant max backedge-taken count is ";
13777OS <<
", actual taken count either this or zero.";
13779OS <<
"Unpredictable constant max backedge-taken count. ";
13784 L->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13788if (!isa<SCEVCouldNotCompute>(SymbolicBTC)) {
13789OS <<
"symbolic max backedge-taken count is ";
13792OS <<
", actual taken count either this or zero.";
13794OS <<
"Unpredictable symbolic max backedge-taken count. ";
13798if (ExitingBlocks.
size() > 1)
13799for (
BasicBlock *ExitingBlock : ExitingBlocks) {
13800OS <<
" symbolic max exit count for " << ExitingBlock->
getName() <<
": ";
13804if (isa<SCEVCouldNotCompute>(ExitBTC)) {
13805// Retry with predicates. 13809if (!isa<SCEVCouldNotCompute>(ExitBTC)) {
13810OS <<
"\n predicated symbolic max exit count for " 13811 << ExitingBlock->
getName() <<
": ";
13813OS <<
"\n Predicates:\n";
13814for (
constauto *
P : Predicates)
13824assert(!Preds.
empty() &&
"Different predicated BTC, but no predicates");
13826 L->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13828if (!isa<SCEVCouldNotCompute>(PBT)) {
13829OS <<
"Predicated backedge-taken count is ";
13832OS <<
"Unpredictable predicated backedge-taken count.";
13834OS <<
" Predicates:\n";
13835for (
constauto *
P : Preds)
13840auto *PredConstantMax =
13842if (PredConstantMax != ConstantBTC) {
13844"different predicated constant max BTC but no predicates");
13846 L->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13848if (!isa<SCEVCouldNotCompute>(PredConstantMax)) {
13849OS <<
"Predicated constant max backedge-taken count is ";
13852OS <<
"Unpredictable predicated constant max backedge-taken count.";
13854OS <<
" Predicates:\n";
13855for (
constauto *
P : Preds)
13860auto *PredSymbolicMax =
13862if (SymbolicBTC != PredSymbolicMax) {
13864"Different predicated symbolic max BTC, but no predicates");
13866 L->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13868if (!isa<SCEVCouldNotCompute>(PredSymbolicMax)) {
13869OS <<
"Predicated symbolic max backedge-taken count is ";
13872OS <<
"Unpredictable predicated symbolic max backedge-taken count.";
13874OS <<
" Predicates:\n";
13875for (
constauto *
P : Preds)
13881 L->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13906OS <<
"DoesNotDominate";
13912OS <<
"ProperlyDominates";
13920// ScalarEvolution's implementation of the print method is to print 13921// out SCEV values of all instructions that are interesting. Doing 13922// this potentially causes it to create new SCEV objects though, 13923// which technically conflicts with the const qualifier. This isn't 13924// observable from outside the class though, so casting away the 13925// const isn't dangerous. 13929OS <<
"Classifying expressions for: ";
13938if (!isa<SCEVCouldNotCompute>(SV)) {
13951if (!isa<SCEVCouldNotCompute>(AtUse)) {
13960OS <<
"\t\t""Exits: ";
13969for (
constauto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
13971OS <<
"\t\t""LoopDispositions: { ";
13977 Iter->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
13985OS <<
"\t\t""LoopDispositions: { ";
13991 InnerL->getHeader()->printAsOperand(
OS,
/*PrintType=*/false);
14002OS <<
"Determining loop execution counts for: ";
14011auto &Values = LoopDispositions[S];
14012for (
auto &V : Values) {
14013if (V.getPointer() == L)
14018auto &Values2 = LoopDispositions[S];
14020if (V.getPointer() == L) {
14029ScalarEvolution::computeLoopDisposition(
constSCEV *S,
constLoop *L) {
14037// If L is the addrec's loop, it's computable. 14041// Add recurrences are never invariant in the function-body (null loop). 14045// Everything that is not defined at loop entry is variant. 14048assert(!L->contains(AR->
getLoop()) &&
"Containing loop's header does not" 14049" dominate the contained loop's header?");
14051// This recurrence is invariant w.r.t. L if AR's loop contains L. 14055// This recurrence is variant w.r.t. L if any of its operands 14061// Otherwise it's loop-invariant. 14076bool HasVarying =
false;
14087// All non-instruction values are loop invariant. All instructions are loop 14088// invariant if they are not contained in the specified loop. 14089// Instructions are never considered invariant in the function body 14090// (null loop) because they are defined within the "loop". 14091if (
auto *
I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
14110auto &Values = BlockDispositions[S];
14111for (
auto &V : Values) {
14112if (V.getPointer() == BB)
14117auto &Values2 = BlockDispositions[S];
14119if (V.getPointer() == BB) {
14128ScalarEvolution::computeBlockDisposition(
constSCEV *S,
constBasicBlock *BB) {
14134// This uses a "dominates" query instead of "properly dominates" query 14135// to test for proper dominance too, because the instruction which 14136// produces the addrec's value is a PHI, and a PHI effectively properly 14137// dominates its entire containing block. 14142// Fall through into SCEVNAryExpr handling. 14169 dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
14170if (
I->getParent() == BB)
14195void ScalarEvolution::forgetBackedgeTakenCounts(
constLoop *L,
14198Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
14199auto It = BECounts.find(L);
14200if (It != BECounts.end()) {
14201for (
const ExitNotTakenInfo &ENT : It->second.ExitNotTaken) {
14202for (
constSCEV *S : {ENT.ExactNotTaken, ENT.SymbolicMaxNotTaken}) {
14203if (!isa<SCEVConstant>(S)) {
14204auto UserIt = BECountUsers.find(S);
14205assert(UserIt != BECountUsers.end());
14210 BECounts.erase(It);
14218while (!Worklist.
empty()) {
14220autoUsers = SCEVUsers.find(Curr);
14221if (
Users != SCEVUsers.end())
14227for (
constauto *S : ToForget)
14228 forgetMemoizedResultsImpl(S);
14230for (
autoI = PredicatedSCEVRewrites.begin();
14231I != PredicatedSCEVRewrites.end();) {
14232 std::pair<const SCEV *, const Loop *>
Entry =
I->first;
14233if (ToForget.count(
Entry.first))
14234 PredicatedSCEVRewrites.erase(
I++);
14240void ScalarEvolution::forgetMemoizedResultsImpl(
constSCEV *S) {
14241 LoopDispositions.erase(S);
14242 BlockDispositions.erase(S);
14243 UnsignedRanges.erase(S);
14244 SignedRanges.erase(S);
14245 HasRecMap.
erase(S);
14246 ConstantMultipleCache.erase(S);
14248if (
auto *AR = dyn_cast<SCEVAddRecExpr>(S)) {
14249 UnsignedWrapViaInductionTried.erase(AR);
14250 SignedWrapViaInductionTried.erase(AR);
14253auto ExprIt = ExprValueMap.
find(S);
14254if (ExprIt != ExprValueMap.
end()) {
14255for (
Value *V : ExprIt->second) {
14256auto ValueIt = ValueExprMap.
find_as(V);
14257if (ValueIt != ValueExprMap.
end())
14258 ValueExprMap.
erase(ValueIt);
14260 ExprValueMap.
erase(ExprIt);
14263auto ScopeIt = ValuesAtScopes.find(S);
14264if (ScopeIt != ValuesAtScopes.end()) {
14265for (
constauto &Pair : ScopeIt->second)
14266if (!isa_and_nonnull<SCEVConstant>(Pair.second))
14268 std::make_pair(Pair.first, S));
14269 ValuesAtScopes.erase(ScopeIt);
14272auto ScopeUserIt = ValuesAtScopesUsers.find(S);
14273if (ScopeUserIt != ValuesAtScopesUsers.end()) {
14274for (
constauto &Pair : ScopeUserIt->second)
14275llvm::erase(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S));
14276 ValuesAtScopesUsers.erase(ScopeUserIt);
14279auto BEUsersIt = BECountUsers.find(S);
14280if (BEUsersIt != BECountUsers.end()) {
14281// Work on a copy, as forgetBackedgeTakenCounts() will modify the original. 14282autoCopy = BEUsersIt->second;
14283for (
constauto &Pair : Copy)
14284 forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt());
14285 BECountUsers.erase(BEUsersIt);
14288auto FoldUser = FoldCacheUser.find(S);
14289if (FoldUser != FoldCacheUser.end())
14290for (
auto &KV : FoldUser->second)
14291 FoldCache.erase(KV);
14292 FoldCacheUser.erase(S);
14296ScalarEvolution::getUsedLoops(
constSCEV *S,
14298structFindUsedLoops {
14300 : LoopsUsed(LoopsUsed) {}
14302bool follow(
constSCEV *S) {
14303if (
auto *AR = dyn_cast<SCEVAddRecExpr>(S))
14308bool isDone()
const{
returnfalse; }
14311 FindUsedLoops F(LoopsUsed);
14315void ScalarEvolution::getReachableBlocks(
14319while (!Worklist.
empty()) {
14321if (!Reachable.
insert(BB).second)
14328if (
auto *
C = dyn_cast<ConstantInt>(
Cond)) {
14329 Worklist.
push_back(
C->isOne() ? TrueBB : FalseBB);
14333if (
auto *Cmp = dyn_cast<ICmpInst>(
Cond)) {
14336if (isKnownPredicateViaConstantRanges(
Cmp->getCmpPredicate(), L, R)) {
14340if (isKnownPredicateViaConstantRanges(
Cmp->getInverseCmpPredicate(), L,
14358// Map's SCEV expressions from one ScalarEvolution "universe" to another. 14375 SCEVMapper SCM(SE2);
14377 SE2.getReachableBlocks(ReachableBlocks,
F);
14379auto GetDelta = [&](
constSCEV *Old,
constSCEV *New) ->
constSCEV * {
14381// SCEV treats "undef" as an unknown but consistent value (i.e. it does 14382// not propagate undef aggressively). This means we can (and do) fail 14383// verification in cases where a transform makes a value go from "undef" 14384// to "undef+1" (say). The transform is fine, since in both cases the 14385// result is "undef", but SCEV thinks the value increased by 1. 14389// Unless VerifySCEVStrict is set, we only compare constant deltas. 14397while (!LoopStack.
empty()) {
14401// Only verify BECounts in reachable loops. For an unreachable loop, 14402// any BECount is legal. 14403if (!ReachableBlocks.
contains(L->getHeader()))
14406// Only verify cached BECounts. Computing new BECounts may change the 14407// results of subsequent SCEV uses. 14408auto It = BackedgeTakenCounts.find(L);
14409if (It == BackedgeTakenCounts.end())
14413 SCM.visit(It->second.getExact(L,
const_cast<ScalarEvolution *
>(
this)));
14418// NB! This situation is legal, but is very suspicious -- whatever pass 14419// change the loop to make a trip count go from could not compute to 14420// computable or vice-versa *should have* invalidated SCEV. However, we 14421// choose not to assert here (for now) since we don't want false 14433constSCEV *Delta = GetDelta(CurBECount, NewBECount);
14434if (Delta && !Delta->
isZero()) {
14435dbgs() <<
"Trip Count for " << *L <<
" Changed!\n";
14436dbgs() <<
"Old: " << *CurBECount <<
"\n";
14437dbgs() <<
"New: " << *NewBECount <<
"\n";
14438dbgs() <<
"Delta: " << *Delta <<
"\n";
14443// Collect all valid loops currently in LoopInfo. 14446while (!Worklist.
empty()) {
14448if (ValidLoops.
insert(L).second)
14449 Worklist.
append(L->begin(), L->end());
14451for (
constauto &KV : ValueExprMap) {
14453// Check for SCEV expressions referencing invalid/deleted loops. 14454if (
auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
14456"AddRec references invalid loop");
14460// Check that the value is also part of the reverse map. 14461auto It = ExprValueMap.
find(KV.second);
14462if (It == ExprValueMap.
end() || !It->second.contains(KV.first)) {
14463dbgs() <<
"Value " << *KV.first
14464 <<
" is in ValueExprMap but not in ExprValueMap\n";
14468if (
auto *
I = dyn_cast<Instruction>(&*KV.first)) {
14469if (!ReachableBlocks.
contains(
I->getParent()))
14471constSCEV *OldSCEV = SCM.visit(KV.second);
14473constSCEV *Delta = GetDelta(OldSCEV, NewSCEV);
14474if (Delta && !Delta->
isZero()) {
14475dbgs() <<
"SCEV for value " << *
I <<
" changed!\n" 14476 <<
"Old: " << *OldSCEV <<
"\n" 14477 <<
"New: " << *NewSCEV <<
"\n" 14478 <<
"Delta: " << *Delta <<
"\n";
14484for (
constauto &KV : ExprValueMap) {
14485for (
Value *V : KV.second) {
14486auto It = ValueExprMap.find_as(V);
14487if (It == ValueExprMap.end()) {
14488dbgs() <<
"Value " << *V
14489 <<
" is in ExprValueMap but not in ValueExprMap\n";
14492if (It->second != KV.first) {
14493dbgs() <<
"Value " << *V <<
" mapped to " << *It->second
14494 <<
" rather than " << *KV.first <<
"\n";
14500// Verify integrity of SCEV users. 14501for (
constauto &S : UniqueSCEVs) {
14503// We do not store dependencies of constants. 14504if (isa<SCEVConstant>(
Op))
14506auto It = SCEVUsers.find(
Op);
14507if (It != SCEVUsers.end() && It->second.count(&S))
14509dbgs() <<
"Use of operand " << *
Op <<
" by user " << S
14510 <<
" is not being tracked!\n";
14515// Verify integrity of ValuesAtScopes users. 14516for (
constauto &ValueAndVec : ValuesAtScopes) {
14518for (
constauto &LoopAndValueAtScope : ValueAndVec.second) {
14519constLoop *L = LoopAndValueAtScope.first;
14520constSCEV *ValueAtScope = LoopAndValueAtScope.second;
14521if (!isa<SCEVConstant>(ValueAtScope)) {
14522auto It = ValuesAtScopesUsers.find(ValueAtScope);
14523if (It != ValuesAtScopesUsers.end() &&
14526dbgs() <<
"Value: " << *
Value <<
", Loop: " << *L <<
", ValueAtScope: " 14527 << *ValueAtScope <<
" missing in ValuesAtScopesUsers\n";
14533for (
constauto &ValueAtScopeAndVec : ValuesAtScopesUsers) {
14534constSCEV *ValueAtScope = ValueAtScopeAndVec.first;
14535for (
constauto &LoopAndValue : ValueAtScopeAndVec.second) {
14536constLoop *L = LoopAndValue.first;
14539auto It = ValuesAtScopes.find(
Value);
14540if (It != ValuesAtScopes.end() &&
14541is_contained(It->second, std::make_pair(L, ValueAtScope)))
14543dbgs() <<
"Value: " << *
Value <<
", Loop: " << *L <<
", ValueAtScope: " 14544 << *ValueAtScope <<
" missing in ValuesAtScopes\n";
14549// Verify integrity of BECountUsers. 14550auto VerifyBECountUsers = [&](
boolPredicated) {
14552Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
14553for (
constauto &LoopAndBEInfo : BECounts) {
14554for (
const ExitNotTakenInfo &ENT : LoopAndBEInfo.second.ExitNotTaken) {
14555for (
constSCEV *S : {ENT.ExactNotTaken, ENT.SymbolicMaxNotTaken}) {
14556if (!isa<SCEVConstant>(S)) {
14557auto UserIt = BECountUsers.find(S);
14558if (UserIt != BECountUsers.end() &&
14559 UserIt->second.contains({ LoopAndBEInfo.first,
Predicated }))
14561dbgs() <<
"Value " << *S <<
" for loop " << *LoopAndBEInfo.first
14562 <<
" missing from BECountUsers\n";
14569 VerifyBECountUsers(
/* Predicated */false);
14570 VerifyBECountUsers(
/* Predicated */true);
14572// Verify intergity of loop disposition cache. 14573for (
auto &[S, Values] : LoopDispositions) {
14574for (
auto [
Loop, CachedDisposition] : Values) {
14576if (CachedDisposition != RecomputedDisposition) {
14577dbgs() <<
"Cached disposition of " << *S <<
" for loop " << *
Loop 14578 <<
" is incorrect: cached " << CachedDisposition <<
", actual " 14579 << RecomputedDisposition <<
"\n";
14585// Verify integrity of the block disposition cache. 14586for (
auto &[S, Values] : BlockDispositions) {
14587for (
auto [BB, CachedDisposition] : Values) {
14589if (CachedDisposition != RecomputedDisposition) {
14590dbgs() <<
"Cached disposition of " << *S <<
" for block %" 14591 << BB->
getName() <<
" is incorrect: cached " << CachedDisposition
14592 <<
", actual " << RecomputedDisposition <<
"\n";
14598// Verify FoldCache/FoldCacheUser caches. 14599for (
auto [
FoldID, Expr] : FoldCache) {
14600autoI = FoldCacheUser.find(Expr);
14601if (
I == FoldCacheUser.end()) {
14602dbgs() <<
"Missing entry in FoldCacheUser for cached expression " << *Expr
14607dbgs() <<
"Missing FoldID in cached users of " << *Expr <<
"!\n";
14611for (
auto [Expr, IDs] : FoldCacheUser) {
14612for (
auto &
FoldID : IDs) {
14613autoI = FoldCache.find(
FoldID);
14614if (
I == FoldCache.end()) {
14615dbgs() <<
"Missing entry in FoldCache for expression " << *Expr
14619if (
I->second != Expr) {
14620dbgs() <<
"Entry in FoldCache doesn't match FoldCacheUser: " 14621 << *
I->second <<
" != " << *Expr <<
"!\n";
14627// Verify that ConstantMultipleCache computations are correct. We check that 14628// cached multiples and recomputed multiples are multiples of each other to 14629// verify correctness. It is possible that a recomputed multiple is different 14630// from the cached multiple due to strengthened no wrap flags or changes in 14631// KnownBits computations. 14632for (
auto [S, Multiple] : ConstantMultipleCache) {
14634if ((Multiple != 0 && RecomputedMultiple != 0 &&
14635 Multiple.
urem(RecomputedMultiple) != 0 &&
14636 RecomputedMultiple.
urem(Multiple) != 0)) {
14637dbgs() <<
"Incorrect cached computation in ConstantMultipleCache for " 14638 << *S <<
" : Computed " << RecomputedMultiple
14639 <<
" but cache contains " << Multiple <<
"!\n";
14648// Invalidate the ScalarEvolution object whenever it isn't preserved or one 14649// of its dependencies is invalidated. 14676// For compatibility with opt's -analyze feature under legacy pass manager 14677// which was not ported to NPM. This keeps tests using 14678// update_analyze_test_checks.py working. 14679 OS <<
"Printing analysis 'Scalar Evolution Analysis' for function '" 14680 <<
F.getName() <<
"':\n";
14686"Scalar Evolution Analysis",
false,
true)
14702F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F),
14703 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F),
14704 getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
14705 getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
14740"Type mismatch between LHS and RHS");
14741// Unique this node based on the arguments 14743ID.AddInteger(Pred);
14747if (
constauto *S = UniquePreds.FindNodeOrInsertPos(
ID, IP))
14751 UniquePreds.InsertNode(Eq, IP);
14759// Unique this node based on the arguments 14762ID.AddInteger(AddedFlags);
14764if (
constauto *S = UniquePreds.FindNodeOrInsertPos(
ID, IP))
14766auto *OF =
new (SCEVAllocator)
14768 UniquePreds.InsertNode(OF, IP);
14777 /// Rewrites \p S in the context of a loop L and the SCEV predication 14778 /// infrastructure. 14780 /// If \p Pred is non-null, the SCEV expression is rewritten to respect the 14781 /// equivalences present in \p Pred. 14783 /// If \p NewPreds is non-null, rewrite is free to add further predicates to 14784 /// \p NewPreds such that the result will be an AddRecExpr. 14788 SCEVPredicateRewriter
Rewriter(L, SE, NewPreds, Pred);
14794if (
auto *U = dyn_cast<SCEVUnionPredicate>(Pred)) {
14795for (
constauto *Pred : U->getPredicates())
14796if (
constauto *IPred = dyn_cast<SCEVComparePredicate>(Pred))
14797if (IPred->getLHS() == Expr &&
14798 IPred->getPredicate() == ICmpInst::ICMP_EQ)
14799return IPred->getRHS();
14800 }
elseif (
constauto *IPred = dyn_cast<SCEVComparePredicate>(Pred)) {
14801if (IPred->getLHS() == Expr &&
14802 IPred->getPredicate() == ICmpInst::ICMP_EQ)
14803return IPred->getRHS();
14806return convertToAddRecWithPreds(Expr);
14813// This couldn't be folded because the operand didn't have the nuw 14814// flag. Add the nusw flag as an assumption that we could make. 14829// This couldn't be folded because the operand didn't have the nsw 14830// flag. Add the nssw flag as an assumption that we could make. 14842explicit SCEVPredicateRewriter(
14850// Check if we've already made this assumption. 14851return Pred && Pred->
implies(
P, SE);
14860return addOverflowAssumption(
A);
14863// If \p Expr represents a PHINode, we try to see if it can be represented 14864// as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible 14865// to add this predicate as a runtime overflow check, we return the AddRec. 14866// If \p Expr does not meet these conditions (is not a PHI node, or we 14867// couldn't create an AddRec for it, or couldn't add the predicate), we just 14870if (!isa<PHINode>(Expr->
getValue()))
14873 std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
14875if (!PredicatedRewrite)
14877for (
constauto *
P : PredicatedRewrite->second){
14878// Wrap predicates from outer loops are not supported. 14879if (
auto *WP = dyn_cast<const SCEVWrapPredicate>(
P)) {
14880if (L != WP->getExpr()->getLoop())
14883if (!addOverflowAssumption(
P))
14886return PredicatedRewrite->first;
14894}
// end anonymous namespace 14899return SCEVPredicateRewriter::rewrite(S, L, *
this,
nullptr, &Preds);
14906 S = SCEVPredicateRewriter::rewrite(S, L, *
this, &TransformPreds,
nullptr);
14907auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
14912// Since the transformation was successful, we can now transfer the SCEV 14922 : FastID(
ID), Kind(Kind) {}
14934constauto *
Op = dyn_cast<SCEVComparePredicate>(
N);
14965constauto *
Op = dyn_cast<SCEVWrapPredicate>(
N);
14977constSCEV *OpStart =
Op->AR->getStart();
14982constSCEV *OpStep =
Op->AR->getStepRecurrence(SE);
14986// If both steps are positive, this implies N, if N's start and step are 14987// ULE/SLE (for NSUW/NSSW) than this'. 15027// We can safely transfer the NSW flag as NSSW. 15032// If the increment is positive, the SCEV NUW flag will also imply the 15033// WrapPredicate NUSW flag. 15035if (Step->getValue()->getValue().isNonNegative())
15039return ImpliedFlags;
15042/// Union predicates don't get cached so create a dummy set ID for it. 15046for (
constauto *
P : Preds)
15057if (
constauto *Set = dyn_cast<SCEVUnionPredicate>(
N))
15059 return this->implies(I, SE);
15067for (
constauto *Pred : Preds)
15072if (
constauto *Set = dyn_cast<SCEVUnionPredicate>(
N)) {
15073for (
constauto *Pred : Set->Preds)
15078// Only add predicate if it is not already implied by this union predicate. 15082// Build a new vector containing the current predicates, except the ones that 15083// are implied by the new predicate N. 15085for (
auto *
P : Preds) {
15086if (
N->implies(
P, SE))
15090 Preds = std::move(PrunedPreds);
15091 Preds.push_back(
N);
15098 Preds = std::make_unique<SCEVUnionPredicate>(Empty, SE);
15103for (
constauto *
Op : Ops)
15104// We do not expect that forgetting cached data for SCEVConstants will ever 15105// open any prospects for sharpening or introduce any correctness issues, 15106// so we don't bother storing their dependencies. 15107if (!isa<SCEVConstant>(
Op))
15108 SCEVUsers[
Op].insert(
User);
15113 RewriteEntry &Entry = RewriteMap[Expr];
15115// If we already have an entry and the version matches, return it. 15116if (Entry.second && Generation == Entry.first)
15117return Entry.second;
15119// We found an entry but it's stale. Rewrite the stale entry 15120// according to the current predicate. 15122 Expr = Entry.second;
15125 Entry = {Generation, NewSCEV};
15131if (!BackedgeCount) {
15134for (
constauto *
P : Preds)
15137return BackedgeCount;
15141if (!SymbolicMaxBackedgeCount) {
15143 SymbolicMaxBackedgeCount =
15145for (
constauto *
P : Preds)
15148return SymbolicMaxBackedgeCount;
15152if (!SmallConstantMaxTripCount) {
15155for (
constauto *
P : Preds)
15158return *SmallConstantMaxTripCount;
15162if (Preds->implies(&Pred, SE))
15167 Preds = std::make_unique<SCEVUnionPredicate>(NewPreds, SE);
15168 updateGeneration();
15175void PredicatedScalarEvolution::updateGeneration() {
15176// If the generation number wrapped recompute everything. 15177if (++Generation == 0) {
15178for (
auto &
II : RewriteMap) {
15179constSCEV *Rewritten =
II.second.second;
15188constauto *AR = cast<SCEVAddRecExpr>(Expr);
15192// Clear the statically implied flags. 15196autoII = FlagsMap.insert({V, Flags});
15204constauto *AR = cast<SCEVAddRecExpr>(Expr);
15209autoII = FlagsMap.find(V);
15211if (
II != FlagsMap.end())
15225for (
constauto *
P : NewPreds)
15228 RewriteMap[SE.
getSCEV(V)] = {Generation, New};
15234 : RewriteMap(
Init.RewriteMap), SE(
Init.SE), L(
Init.L),
15237 Generation(
Init.Generation), BackedgeCount(
Init.BackedgeCount) {
15238for (
autoI :
Init.FlagsMap)
15239 FlagsMap.insert(
I);
15244for (
auto *BB : L.getBlocks())
15245for (
auto &
I : *BB) {
15250autoII = RewriteMap.find(Expr);
15252if (
II == RewriteMap.end())
15255// Don't print things that are not interesting. 15256if (
II->second.second == Expr)
15265// Match the mathematical pattern A - (A / B) * B, where A and B can be 15266// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used 15267// for URem with constant power-of-2 second operands. 15268// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is 15269// 4, A / B becomes X / 8). 15270bool ScalarEvolution::matchURem(
constSCEV *Expr,
constSCEV *&LHS,
15275// Try to match 'zext (trunc A to iB) to iY', which is used 15276// for URem with constant power-of-2 second operands. Make sure the size of 15277// the operand A matches the size of the whole expressions. 15278if (
constauto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr))
15279if (
constauto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) {
15280LHS = Trunc->getOperand();
15281// Bail out if the type of the LHS is larger than the type of the 15282// expression for now. 15292constauto *
Add = dyn_cast<SCEVAddExpr>(Expr);
15293if (
Add ==
nullptr ||
Add->getNumOperands() != 2)
15297constauto *
Mul = dyn_cast<SCEVMulExpr>(
Add->getOperand(0));
15302constauto MatchURemWithDivisor = [&](
constSCEV *
B) {
15303// (SomeExpr + (-(SomeExpr / B) * B)). 15312// (SomeExpr + (-1 * (SomeExpr / B) * B)). 15313if (
Mul->getNumOperands() == 3 && isa<SCEVConstant>(
Mul->getOperand(0)))
15314return MatchURemWithDivisor(
Mul->getOperand(1)) ||
15315 MatchURemWithDivisor(
Mul->getOperand(2));
15317// (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). 15318if (
Mul->getNumOperands() == 2)
15319return MatchURemWithDivisor(
Mul->getOperand(1)) ||
15320 MatchURemWithDivisor(
Mul->getOperand(0)) ||
15334 collectFromBlock(SE, Guards, Header, Pred, VisitedBlocks);
15338void ScalarEvolution::LoopGuards::collectFromPHI(
15346usingMinMaxPattern = std::pair<const SCEVConstant *, SCEVTypes>;
15347auto GetMinMaxConst = [&](
unsigned IncomingIdx) -> MinMaxPattern {
15353 collectFromBlock(SE,
G->second, Phi.getParent(),
InBlock, VisitedBlocks,
15355auto &RewriteMap =
G->second.RewriteMap;
15356if (RewriteMap.empty())
15358auto S = RewriteMap.find(SE.
getSCEV(Phi.getIncomingValue(IncomingIdx)));
15359if (S == RewriteMap.end())
15361auto *SM = dyn_cast_if_present<SCEVMinMaxExpr>(S->second);
15364if (
constSCEVConstant *C0 = dyn_cast<SCEVConstant>(SM->getOperand(0)))
15365return {C0, SM->getSCEVType()};
15368auto MergeMinMaxConst = [](MinMaxPattern
P1,
15369 MinMaxPattern P2) -> MinMaxPattern {
15372if (!C1 || !C2 || T1 != T2)
15376return {C1->getAPInt().
ult(C2->getAPInt()) ? C1 : C2, T1};
15378return {C1->getAPInt().
slt(C2->getAPInt()) ? C1 : C2, T1};
15380return {C1->getAPInt().
ugt(C2->getAPInt()) ? C1 : C2, T1};
15382return {C1->getAPInt().
sgt(C2->getAPInt()) ? C1 : C2, T1};
15387autoP = GetMinMaxConst(0);
15388for (
unsignedint In = 1;
In <
Phi.getNumIncomingValues();
In++) {
15391P = MergeMinMaxConst(
P, GetMinMaxConst(In));
15397 Guards.RewriteMap.insert({
LHS,
RHS});
15401void ScalarEvolution::LoopGuards::collectFromBlock(
15410// WARNING: It is generally unsound to apply any wrap flags to the proposed 15411// replacement SCEV which isn't directly implied by the structure of that 15412// SCEV. In particular, using contextual facts to imply flags is *NOT* 15413// legal. See the scoping rules for flags in the header to understand why. 15415// If LHS is a constant, apply information to the other expression. 15416if (isa<SCEVConstant>(LHS)) {
15421// Check for a condition of the form (-C1 + X < C2). InstCombine will 15422// create this form when combining two checks of the form (X u< C2 + C1) and 15425 &ExprsToRewrite]() {
15428auto *C2 = dyn_cast<SCEVConstant>(RHS);
15438// Bail out, unless we have a non-wrapping, monotonic range. 15439if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
15441autoI = RewriteMap.find(LHSUnknown);
15442constSCEV *RewrittenLHS =
I != RewriteMap.end() ?
I->second : LHSUnknown;
15450if (MatchRangeCheckIdiom())
15453// Return true if \p Expr is a MinMax SCEV expression with a non-negative 15454// constant operand. If so, return in \p SCTy the SCEV type and in \p RHS 15455// the non-constant operand and in \p LHS the constant operand. 15456auto IsMinMaxSCEVWithNonNegativeConstant =
15459if (
auto *
MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) {
15460if (
MinMax->getNumOperands() != 2)
15462if (
auto *
C = dyn_cast<SCEVConstant>(
MinMax->getOperand(0))) {
15463if (
C->getAPInt().isNegative())
15465 SCTy =
MinMax->getSCEVType();
15474// Checks whether Expr is a non-negative constant, and Divisor is a positive 15475// constant, and returns their APInt in ExprVal and in DivisorVal. 15476auto GetNonNegExprAndPosDivisor = [&](
constSCEV *Expr,
constSCEV *Divisor,
15478auto *ConstExpr = dyn_cast<SCEVConstant>(Expr);
15479auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor);
15480if (!ConstExpr || !ConstDivisor)
15482 ExprVal = ConstExpr->getAPInt();
15483 DivisorVal = ConstDivisor->getAPInt();
15484return ExprVal.isNonNegative() && !DivisorVal.isNonPositive();
15487// Return a new SCEV that modifies \p Expr to the closest number divides by 15488// \p Divisor and greater or equal than Expr. 15489// For now, only handle constant Expr and Divisor. 15490auto GetNextSCEVDividesByDivisor = [&](
constSCEV *Expr,
15491constSCEV *Divisor) {
15494if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
15498// return the SCEV: Expr + Divisor - Expr % Divisor 15499return SE.
getConstant(ExprVal + DivisorVal - Rem);
15503// Return a new SCEV that modifies \p Expr to the closest number divides by 15504// \p Divisor and less or equal than Expr. 15505// For now, only handle constant Expr and Divisor. 15506auto GetPreviousSCEVDividesByDivisor = [&](
constSCEV *Expr,
15507constSCEV *Divisor) {
15510if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
15513// return the SCEV: Expr - Expr % Divisor 15517// Apply divisibilty by \p Divisor on MinMaxExpr with constant values, 15518// recursively. This is done by aligning up/down the constant value to the 15521 ApplyDivisibiltyOnMinMaxExpr = [&](
constSCEV *MinMaxExpr,
15522constSCEV *Divisor) {
15523constSCEV *MinMaxLHS =
nullptr, *MinMaxRHS =
nullptr;
15525if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS,
15529 isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr);
15531"Expected non-negative operand!");
15532auto *DivisibleExpr =
15533 IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, Divisor)
15534 : GetNextSCEVDividesByDivisor(MinMaxLHS, Divisor);
15536 ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr};
15540// If we have LHS == 0, check if LHS is computing a property of some unknown 15541// SCEV %v which we can rewrite %v to express explicitly. 15543// If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to 15544// explicitly express that. 15545constSCEV *URemLHS =
nullptr;
15546constSCEV *URemRHS =
nullptr;
15547if (SE.matchURem(LHS, URemLHS, URemRHS)) {
15548if (
constSCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
15549autoI = RewriteMap.find(LHSUnknown);
15550constSCEV *RewrittenLHS =
15551I != RewriteMap.end() ?
I->second : LHSUnknown;
15552 RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS);
15553constauto *Multiple =
15555 RewriteMap[LHSUnknown] = Multiple;
15562// Do not apply information for constants or if RHS contains an AddRec. 15566// If RHS is SCEVUnknown, make sure the information is applied to it. 15567if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
15572// Puts rewrite rule \p From -> \p To into the rewrite map. Also if \p From 15573// and \p FromRewritten are the same (i.e. there has been no rewrite 15574// registered for \p From), then puts this value in the list of rewritten 15576auto AddRewrite = [&](
constSCEV *
From,
constSCEV *FromRewritten,
15578if (
From == FromRewritten)
15580 RewriteMap[
From] = To;
15583// Checks whether \p S has already been rewritten. In that case returns the 15584// existing rewrite because we want to chain further rewrites onto the 15585// already rewritten value. Otherwise returns \p S. 15586auto GetMaybeRewritten = [&](
constSCEV *S) {
15587autoI = RewriteMap.find(S);
15588returnI != RewriteMap.end() ?
I->second : S;
15591// Check for the SCEV expression (A /u B) * B while B is a constant, inside 15592// \p Expr. The check is done recuresively on \p Expr, which is assumed to 15593// be a composition of Min/Max SCEVs. Return whether the SCEV expression (A 15594// /u B) * B was found, and return the divisor B in \p DividesBy. For 15595// example, if Expr = umin (umax ((A /u 8) * 8, 16), 64), return true since 15596// (A /u 8) * 8 matched the pattern, and return the constant SCEV 8 in \p 15598 std::function<
bool(
constSCEV *,
constSCEV *&)> HasDivisibiltyInfo =
15599 [&](
constSCEV *Expr,
constSCEV *&DividesBy) {
15600if (
auto *
Mul = dyn_cast<SCEVMulExpr>(Expr)) {
15601if (
Mul->getNumOperands() != 2)
15603auto *MulLHS =
Mul->getOperand(0);
15604auto *MulRHS =
Mul->getOperand(1);
15605if (isa<SCEVConstant>(MulLHS))
15607if (
auto *Div = dyn_cast<SCEVUDivExpr>(MulLHS))
15608if (Div->getOperand(1) == MulRHS) {
15609 DividesBy = MulRHS;
15613if (
auto *
MinMax = dyn_cast<SCEVMinMaxExpr>(Expr))
15614return HasDivisibiltyInfo(
MinMax->getOperand(0), DividesBy) ||
15615 HasDivisibiltyInfo(
MinMax->getOperand(1), DividesBy);
15619// Return true if Expr known to divide by \p DividesBy. 15620 std::function<
bool(
constSCEV *,
constSCEV *&)> IsKnownToDivideBy =
15621 [&](
constSCEV *Expr,
constSCEV *DividesBy) {
15624if (
auto *
MinMax = dyn_cast<SCEVMinMaxExpr>(Expr))
15625return IsKnownToDivideBy(
MinMax->getOperand(0), DividesBy) &&
15626 IsKnownToDivideBy(
MinMax->getOperand(1), DividesBy);
15630constSCEV *RewrittenLHS = GetMaybeRewritten(LHS);
15631constSCEV *DividesBy =
nullptr;
15632if (HasDivisibiltyInfo(RewrittenLHS, DividesBy))
15633// Check that the whole expression is divided by DividesBy 15635 IsKnownToDivideBy(RewrittenLHS, DividesBy) ? DividesBy :
nullptr;
15637// Collect rewrites for LHS and its transitive operands based on the 15639// For min/max expressions, also apply the guard to its operands: 15640// 'min(a, b) >= c' -> '(a >= c) and (b >= c)', 15641// 'min(a, b) > c' -> '(a > c) and (b > c)', 15642// 'max(a, b) <= c' -> '(a <= c) and (b <= c)', 15643// 'max(a, b) < c' -> '(a < c) and (b < c)'. 15645// We cannot express strict predicates in SCEV, so instead we replace them 15646// with non-strict ones against plus or minus one of RHS depending on the 15657RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) :
RHS;
15663RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) :
RHS;
15667RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) :
RHS;
15671RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) :
RHS;
15680auto EnqueueOperands = [&Worklist](
constSCEVNAryExpr *S) {
15684while (!Worklist.
empty()) {
15686if (isa<SCEVConstant>(
From))
15690constSCEV *FromRewritten = GetMaybeRewritten(
From);
15691constSCEV *To =
nullptr;
15697if (
auto *
UMax = dyn_cast<SCEVUMaxExpr>(FromRewritten))
15698 EnqueueOperands(
UMax);
15703if (
auto *
SMax = dyn_cast<SCEVSMaxExpr>(FromRewritten))
15704 EnqueueOperands(
SMax);
15709if (
auto *
UMin = dyn_cast<SCEVUMinExpr>(FromRewritten))
15710 EnqueueOperands(
UMin);
15715if (
auto *
SMin = dyn_cast<SCEVSMinExpr>(FromRewritten))
15716 EnqueueOperands(
SMin);
15719if (isa<SCEVConstant>(RHS))
15724constSCEV *OneAlignedUp =
15725 DividesBy ? GetNextSCEVDividesByDivisor(One, DividesBy) : One;
15726 To = SE.
getUMaxExpr(FromRewritten, OneAlignedUp);
15734 AddRewrite(
From, FromRewritten, To);
15739// First, collect information from assumptions dominating the loop. 15743auto *AssumeI = cast<CallInst>(AssumeVH);
15749// Second, collect information from llvm.experimental.guards dominating the loop. 15751 SE.F.
getParent(), Intrinsic::experimental_guard);
15753for (
constauto *GU : GuardDecl->users())
15754if (
constauto *Guard = dyn_cast<IntrinsicInst>(GU))
15755if (Guard->getFunction() ==
Block->getParent() &&
15759// Third, collect conditions from dominating branches. Starting at the loop 15760// predecessor, climb up the predecessor chain, as long as there are 15761// predecessors that can be found that have unique successors leading to the 15763// TODO: share this logic with isLoopEntryGuardedByCond. 15764unsigned NumCollectedConditions = 0;
15766 std::pair<const BasicBlock *, const BasicBlock *> Pair(Pred,
Block);
15768 Pair = SE.getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
15769 VisitedBlocks.
insert(Pair.second);
15771 dyn_cast<BranchInst>(Pair.first->getTerminator());
15777 NumCollectedConditions++;
15779// If we are recursively collecting guards stop after 2 15780// conditions to limit compile-time impact for now. 15781if (
Depth > 0 && NumCollectedConditions == 2)
15784// Finally, if we stopped climbing the predecessor chain because 15785// there wasn't a unique one to continue, try to collect conditions 15786// for PHINodes by recursively following all of their incoming 15787// blocks and try to merge the found conditions to build a new one 15789if (Pair.second->hasNPredecessorsOrMore(2) &&
15792for (
auto &Phi : Pair.second->phis())
15793 collectFromPHI(SE, Guards, Phi, VisitedBlocks, IncomingGuards,
Depth);
15796// Now apply the information from the collected conditions to 15797// Guards.RewriteMap. Conditions are processed in reverse order, so the 15798// earliest conditions is processed first. This ensures the SCEVs with the 15799// shortest dependency chains are constructed first. 15800for (
auto [Term, EnterIfTrue] :
reverse(Terms)) {
15804while (!Worklist.
empty()) {
15809if (
auto *Cmp = dyn_cast<ICmpInst>(
Cond)) {
15811 EnterIfTrue ?
Cmp->getPredicate() :
Cmp->getInversePredicate();
15814 CollectCondition(
Predicate, LHS, RHS, Guards.RewriteMap);
15827// Let the rewriter preserve NUW/NSW flags if the unsigned/signed ranges of 15828// the replacement expressions are contained in the ranges of the replaced 15830 Guards.PreserveNUW =
true;
15831 Guards.PreserveNSW =
true;
15832for (
constSCEV *Expr : ExprsToRewrite) {
15833constSCEV *RewriteTo = Guards.RewriteMap[Expr];
15834 Guards.PreserveNUW &=
15836 Guards.PreserveNSW &=
15840// Now that all rewrite information is collect, rewrite the collected 15841// expressions with the information in the map. This applies information to 15843if (ExprsToRewrite.size() > 1) {
15844for (
constSCEV *Expr : ExprsToRewrite) {
15845constSCEV *RewriteTo = Guards.RewriteMap[Expr];
15846 Guards.RewriteMap.erase(Expr);
15847 Guards.RewriteMap.insert({Expr, Guards.
rewrite(RewriteTo)});
15853 /// A rewriter to replace SCEV expressions in Map with the corresponding entry 15854 /// in the map. It skips AddRecExpr because we cannot guarantee that the 15855 /// replacement is loop invariant in the loop of the AddRec. 15856classSCEVLoopGuardRewriter
15866if (Guards.PreserveNUW)
15868if (Guards.PreserveNSW)
15875autoI = Map.find(Expr);
15882autoI = Map.find(Expr);
15883if (
I == Map.end()) {
15884// If we didn't find the extact ZExt expr in the map, check if there's 15885// an entry for a smaller ZExt we can use instead. 15889while (Bitwidth % 8 == 0 && Bitwidth >= 8 &&
15890 Bitwidth >
Op->getType()->getScalarSizeInBits()) {
15893autoI = Map.find(NarrowExt);
15896 Bitwidth = Bitwidth / 2;
15906autoI = Map.find(Expr);
15914autoI = Map.find(Expr);
15921autoI = Map.find(Expr);
15929bool Changed =
false;
15935// We are only replacing operands with equivalent values, so transfer the 15936// flags from the original expression. 15937return !Changed ? Expr
15945bool Changed =
false;
15951// We are only replacing operands with equivalent values, so transfer the 15952// flags from the original expression. 15953return !Changed ? Expr
15960if (RewriteMap.empty())
15963 SCEVLoopGuardRewriter
Rewriter(SE, *
this);
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
block Block Frequency Analysis
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
This file defines a hash set that can be used to remove duplication of nodes in a graph.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
mir Rename Register Operands
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
This file provides utility classes that use RAII to save and restore values.
bool SCEVMinMaxExprContains(const SCEV *Root, const SCEV *OperandToFind, SCEVTypes RootKind)
static cl::opt< unsigned > MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, cl::desc("Max coefficients in AddRec during evolving"), cl::init(8))
static cl::opt< unsigned > RangeIterThreshold("scev-range-iter-threshold", cl::Hidden, cl::desc("Threshold for switching to iteratively computing SCEV ranges"), cl::init(32))
static const Loop * isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI)
static unsigned getConstantTripCount(const SCEVConstant *ExitCount)
static int CompareValueComplexity(const LoopInfo *const LI, Value *LV, Value *RV, unsigned Depth)
Compare the two values LV and RV in terms of their "complexity" where "complexity" is a partial (and ...
static void PushLoopPHIs(const Loop *L, SmallVectorImpl< Instruction * > &Worklist, SmallPtrSetImpl< Instruction * > &Visited)
Push PHI nodes in the header of the given loop onto the given Worklist.
static void insertFoldCacheEntry(const ScalarEvolution::FoldID &ID, const SCEV *S, DenseMap< ScalarEvolution::FoldID, const SCEV * > &FoldCache, DenseMap< const SCEV *, SmallVector< ScalarEvolution::FoldID, 2 > > &FoldCacheUser)
static cl::opt< bool > ClassifyExpressions("scalar-evolution-classify-expressions", cl::Hidden, cl::init(true), cl::desc("When printing analysis, include information on every instruction"))
static bool CanConstantFold(const Instruction *I)
Return true if we can constant fold an instruction of the specified type, assuming that all operands ...
static cl::opt< unsigned > AddOpsInlineThreshold("scev-addops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining addition operands into a SCEV"), cl::init(500))
static cl::opt< unsigned > MaxLoopGuardCollectionDepth("scalar-evolution-max-loop-guard-collection-depth", cl::Hidden, cl::desc("Maximum depth for recursive loop guard collection"), cl::init(1))
static cl::opt< bool > VerifyIR("scev-verify-ir", cl::Hidden, cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"), cl::init(false))
static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, Value *&C, Value *&LHS, Value *&RHS)
static std::optional< int > CompareSCEVComplexity(EquivalenceClasses< const SCEV * > &EqCacheSCEV, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, DominatorTree &DT, unsigned Depth=0)
static const SCEV * getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE, unsigned Depth)
static std::optional< APInt > MinOptional(std::optional< APInt > X, std::optional< APInt > Y)
Helper function to compare optional APInts: (a) if X and Y both exist, return min(X,...
static cl::opt< unsigned > MulOpsInlineThreshold("scev-mulops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining multiplication operands into a SCEV"), cl::init(32))
static void GroupByComplexity(SmallVectorImpl< const SCEV * > &Ops, LoopInfo *LI, DominatorTree &DT)
Given a list of SCEV objects, order them by their complexity, and group objects of the same complexit...
static const SCEV * constantFoldAndGroupOps(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, SmallVectorImpl< const SCEV * > &Ops, FoldT Fold, IsIdentityT IsIdentity, IsAbsorberT IsAbsorber)
Performs a number of common optimizations on the passed Ops.
static std::optional< const SCEV * > createNodeForSelectViaUMinSeq(ScalarEvolution *SE, const SCEV *CondExpr, const SCEV *TrueExpr, const SCEV *FalseExpr)
static Constant * BuildConstantFromSCEV(const SCEV *V)
This builds up a Constant using the ConstantExpr interface.
static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE)
static const SCEV * BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, Type *ResultTy)
Compute BC(It, K). The result has width W. Assume, K > 0.
static cl::opt< unsigned > MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden, cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"), cl::init(8))
static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, const SCEV *Candidate)
Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
static PHINode * getConstantEvolvingPHI(Value *V, const Loop *L)
getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node in the loop that V is deri...
static cl::opt< unsigned > MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, cl::desc("Maximum number of iterations SCEV will " "symbolically execute a constant " "derived loop"), cl::init(100))
static bool MatchBinarySub(const SCEV *S, const SCEV *&LHS, const SCEV *&RHS)
static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow)
static void PrintSCEVWithTypeHint(raw_ostream &OS, const SCEV *S)
When printing a top-level SCEV for trip counts, it's helpful to include a type for constants which ar...
static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, const Loop *L)
static bool containsConstantInAddMulChain(const SCEV *StartExpr)
Determine if any of the operands in this SCEV are a constant or if any of the add or multiply express...
static const SCEV * getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE, unsigned Depth)
static bool hasHugeExpression(ArrayRef< const SCEV * > Ops)
Returns true if Ops contains a huge SCEV (the subtree of S contains at least HugeExprThreshold nodes)...
static cl::opt< unsigned > MaxPhiSCCAnalysisSize("scalar-evolution-max-scc-analysis-depth", cl::Hidden, cl::desc("Maximum amount of nodes to process while searching SCEVUnknown " "Phi strongly connected components"), cl::init(8))
static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
static cl::opt< unsigned > MaxSCEVOperationsImplicationDepth("scalar-evolution-max-scev-operations-implication-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV operations implication analysis"), cl::init(2))
static void PushDefUseChildren(Instruction *I, SmallVectorImpl< Instruction * > &Worklist, SmallPtrSetImpl< Instruction * > &Visited)
Push users of the given Instruction onto the given Worklist.
static std::optional< APInt > SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, const ConstantRange &Range, ScalarEvolution &SE)
Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n iterations.
static cl::opt< bool > UseContextForNoWrapFlagInference("scalar-evolution-use-context-for-no-wrap-flag-strenghening", cl::Hidden, cl::desc("Infer nuw/nsw flags using context where suitable"), cl::init(true))
static cl::opt< bool > EnableFiniteLoopControl("scalar-evolution-finite-loop", cl::Hidden, cl::desc("Handle <= and >= in finite loops"), cl::init(true))
static std::optional< std::tuple< APInt, APInt, APInt, APInt, unsigned > > GetQuadraticEquation(const SCEVAddRecExpr *AddRec)
For a given quadratic addrec, generate coefficients of the corresponding quadratic equation,...
static bool isKnownPredicateExtendIdiom(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
static std::optional< BinaryOp > MatchBinaryOp(Value *V, const DataLayout &DL, AssumptionCache &AC, const DominatorTree &DT, const Instruction *CxtI)
Try to map V into a BinaryOp, and return std::nullopt on failure.
static std::optional< APInt > SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE)
Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n iterations.
static std::optional< APInt > TruncIfPossible(std::optional< APInt > X, unsigned BitWidth)
Helper function to truncate an optional APInt to a given BitWidth.
static cl::opt< unsigned > MaxSCEVCompareDepth("scalar-evolution-max-scev-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV complexity comparisons"), cl::init(32))
static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, const SCEVConstant *ConstantTerm, const SCEVAddExpr *WholeAddExpr)
static cl::opt< unsigned > MaxConstantEvolvingDepth("scalar-evolution-max-constant-evolving-depth", cl::Hidden, cl::desc("Maximum depth of recursive constant evolving"), cl::init(32))
static ConstantRange getRangeForAffineARHelper(APInt Step, const ConstantRange &StartRange, const APInt &MaxBECount, bool Signed)
static std::optional< ConstantRange > GetRangeFromMetadata(Value *V)
Helper method to assign a range to V from metadata present in the IR.
static const SCEV * SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, SmallVectorImpl< const SCEVPredicate * > *Predicates, ScalarEvolution &SE)
Finds the minimum unsigned root of the following equation:
static cl::opt< unsigned > HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden, cl::desc("Size of the expression which is considered huge"), cl::init(4096))
static Type * isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, bool &Signed, ScalarEvolution &SE)
Helper function to createAddRecFromPHIWithCasts.
static Constant * EvaluateExpression(Value *V, const Loop *L, DenseMap< Instruction *, Constant * > &Vals, const DataLayout &DL, const TargetLibraryInfo *TLI)
EvaluateExpression - Given an expression that passes the getConstantEvolvingPHI predicate,...
static const SCEV * MatchNotExpr(const SCEV *Expr)
If Expr computes ~A, return A else return nullptr.
static cl::opt< unsigned > MaxValueCompareDepth("scalar-evolution-max-value-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive value complexity comparisons"), cl::init(2))
static cl::opt< bool, true > VerifySCEVOpt("verify-scev", cl::Hidden, cl::location(VerifySCEV), cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"))
static const SCEV * getSignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE)
static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, const ArrayRef< const SCEV * > Ops, SCEV::NoWrapFlags Flags)
static cl::opt< unsigned > MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, cl::desc("Maximum depth of recursive arithmetics"), cl::init(32))
static bool HasSameValue(const SCEV *A, const SCEV *B)
SCEV structural equivalence is usually sufficient for testing whether two expressions are equal,...
static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow)
Compute the result of "n choose k", the binomial coefficient.
static bool CollectAddOperandsWithScales(SmallDenseMap< const SCEV *, APInt, 16 > &M, SmallVectorImpl< const SCEV * > &NewOps, APInt &AccumulatedConstant, ArrayRef< const SCEV * > Ops, const APInt &Scale, ScalarEvolution &SE)
Process the given Ops list, which is a list of operands to be added under the given scale,...
static bool canConstantEvolve(Instruction *I, const Loop *L)
Determine whether this instruction can constant evolve within this loop assuming its operands can all...
static PHINode * getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, DenseMap< Instruction *, PHINode * > &PHIMap, unsigned Depth)
getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by recursing through each instructi...
static bool scevUnconditionallyPropagatesPoisonFromOperands(SCEVTypes Kind)
static cl::opt< bool > VerifySCEVStrict("verify-scev-strict", cl::Hidden, cl::desc("Enable stricter verification with -verify-scev is passed"))
static Constant * getOtherIncomingValue(PHINode *PN, BasicBlock *BB)
static cl::opt< bool > UseExpensiveRangeSharpening("scalar-evolution-use-expensive-range-sharpening", cl::Hidden, cl::init(false), cl::desc("Use more powerful methods of sharpening expression ranges. May " "be costly in terms of compile time"))
static const SCEV * getUnsignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE)
static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Is LHS Pred RHS true on the virtue of LHS or RHS being a Min or Max expression?
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static bool InBlock(const Value *V, const BasicBlock *BB)
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file contains some functions that are useful when dealing with strings.
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
APInt umul_ov(const APInt &RHS, bool &Overflow) const
APInt udiv(const APInt &RHS) const
Unsigned division operation.
APInt zext(unsigned width) const
Zero extend to a new width.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
APInt getHiBits(unsigned numBits) const
Compute an APInt containing numBits highbits from this APInt.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
APInt abs() const
Get the absolute value.
bool sgt(const APInt &RHS) const
Signed greater than comparison.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
unsigned countTrailingZeros() const
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
APInt multiplicativeInverse() const
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
bool isSignBitSet() const
Determine if sign bit of this APInt is set.
bool slt(const APInt &RHS) const
Signed less than comparison.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
API to communicate dependencies between analyses during invalidation.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
MutableArrayRef< ResultElem > assumptions()
Access the list of assumption handles currently tracked for this function.
bool isSingleEdge() const
Check if this is the only edge between Start and End.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Instruction & front() const
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
unsigned getNoWrapKind() const
Returns one of OBO::NoSignedWrap or OBO::NoUnsignedWrap.
Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
LLVM_ATTRIBUTE_RETURNS_NONNULL void * Allocate(size_t Size, Align Alignment)
Allocate space at the specified alignment.
This class represents a function call, abstracting a target machine's calling convention.
Value handle with callbacks on RAUW and destruction.
bool isFalseWhenEqual() const
This is just a convenience.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isTrueWhenEqual() const
This is just a convenience.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * getNot(Constant *C)
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
static Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static Constant * getNeg(Constant *C, bool HasNSW=false)
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static ConstantInt * getBool(LLVMContext &Context, bool V)
This class represents a range of values.
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
PreferredRangeType
If represented precisely, the result of some range operations may consist of multiple disjoint ranges...
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
ConstantRange truncate(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other? NOTE: false does not mean that inverse pr...
bool isEmptySet() const
Return true if this set contains no members.
ConstantRange zeroExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
bool isSignWrappedSet() const
Return true if this set wraps around the signed domain.
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
void print(raw_ostream &OS) const
Print out the bounds to a stream.
ConstantRange signExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
const APInt & getUpper() const
Return the upper value for this range.
ConstantRange unionWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the union of this range with another range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
static ConstantRange makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, const ConstantRange &Other, unsigned NoWrapKind)
Produce the largest range containing all X such that "X BinOp Y" is guaranteed not to wrap (overflow)...
unsigned getMinSignedBits() const
Compute the maximal number of bits needed to represent every value in this signed range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
ConstantRange sub(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a subtraction of a value in this r...
ConstantRange sextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
iterator find_as(const LookupKeyT &Val)
Alternate version of find() which allows a different, and possibly less expensive,...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
bool isEquivalent(const ElemTy &V1, const ElemTy &V2) const
FoldingSetNodeIDRef - This class describes a reference to an interned FoldingSetNodeID,...
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
FunctionPass class - This class is used to implement most global optimizations.
const BasicBlock & getEntryBlock() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Represents flags for the getelementptr instruction/expression.
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
static GEPNoWrapFlags none()
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
Module * getParent()
Get the module that this global value is contained inside of...
static bool isPrivateLinkage(LinkageTypes Linkage)
static bool isInternalLinkage(LinkageTypes Linkage)
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
static bool isGE(Predicate P)
Return true if the predicate is SGE or UGE.
CmpPredicate getSwappedCmpPredicate() const
static bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
CmpPredicate getInverseCmpPredicate() const
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
Predicate getFlippedSignednessPredicate() const
For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->EQ.
static CmpPredicate getInverseCmpPredicate(CmpPredicate Pred)
bool isEquality() const
Return true if this predicate is either EQ or NE.
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
static bool isLE(Predicate P)
Return true if the predicate is SLE or ULE.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
A Module instance is used to store all the information related to an LLVM module.
This is a utility class that provides an abstraction for the common functionality between Instruction...
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
const SCEVPredicate & getPredicate() const
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
void print(raw_ostream &OS, unsigned Depth) const
Print the SCEV mappings done by the Predicated Scalar Evolution.
bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L)
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
unsigned getSmallConstantMaxTripCount()
Returns the upper bound of the loop trip count as a normal unsigned value, or 0 if the trip count is ...
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
constexpr bool isValid() const
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void setNoWrapFlags(NoWrapFlags Flags)
Set flags for a recurrence without clearing any previously set flags.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
bool isQuadratic() const
Return true if this represents an expression A + B*x + C*x^2 where A, B and C are loop invariant valu...
const SCEV * getNumIterationsInRange(const ConstantRange &Range, ScalarEvolution &SE) const
Return the number of iterations of this loop that produce values in the specified constant range.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
const SCEV * getOperand() const
SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, const SCEV *op, Type *ty)
void setNoWrapFlags(NoWrapFlags Flags)
Set flags for a non-recurrence without clearing previously set flags.
This class represents an assumption that the expression LHS Pred RHS evaluates to true,...
SCEVComparePredicate(const FoldingSetNodeIDRef ID, const ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
bool isAlwaysTrue() const override
Returns true if the predicate is always true.
void print(raw_ostream &OS, unsigned Depth=0) const override
Prints a textual representation of this predicate with an indentation of Depth.
bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const override
Implementation of the SCEVPredicate interface.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This is the base class for unary integral cast operator classes.
SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, const SCEV *op, Type *ty)
This node is the base class min/max selections.
static enum SCEVTypes negate(enum SCEVTypes T)
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSelfWrap() const
size_t getNumOperands() const
bool hasNoSignedWrap() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
const SCEV *const * Operands
ArrayRef< const SCEV * > operands() const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
SCEVPredicate(const SCEVPredicate &)=default
virtual bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const =0
Returns true if this predicate implies N.
virtual void print(raw_ostream &OS, unsigned Depth=0) const =0
Prints a textual representation of this predicate with an indentation of Depth.
This class represents a cast from a pointer to a pointer-sized integer value.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visitSignExtendExpr(const SCEVSignExtendExpr *Expr)
const SCEV * visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr)
const SCEV * visitSMinExpr(const SCEVSMinExpr *Expr)
const SCEV * visitUMinExpr(const SCEVUMinExpr *Expr)
This class represents a signed maximum selection.
This class represents a signed minimum selection.
This node is the base class for sequential/in-order min/max selections.
SCEVTypes getEquivalentNonSequentialSCEVType() const
This class represents a sequential/in-order unsigned minimum selection.
This class represents a sign extension of a small integer value to a larger integer value.
Visit all nodes in the expression tree using worklist traversal.
void visitAll(const SCEV *Root)
This class represents a truncation of an integer value to a smaller integer value.
This class represents a binary unsigned division operation.
const SCEV * getLHS() const
const SCEV * getRHS() const
This class represents an unsigned maximum selection.
This class represents an unsigned minimum selection.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
void print(raw_ostream &OS, unsigned Depth) const override
Prints a textual representation of this predicate with an indentation of Depth.
bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const override
Returns true if this predicate implies N.
SCEVUnionPredicate(ArrayRef< const SCEVPredicate * > Preds, ScalarEvolution &SE)
Union predicates don't get cached so create a dummy set ID for it.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents the value of vscale, as used when defining the length of a scalable vector or r...
This class represents an assumption made on an AddRec expression.
IncrementWrapFlags
Similar to SCEV::NoWrapFlags, but with slightly different semantics for FlagNUSW.
SCEVWrapPredicate(const FoldingSetNodeIDRef ID, const SCEVAddRecExpr *AR, IncrementWrapFlags Flags)
bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const override
Returns true if this predicate implies N.
static SCEVWrapPredicate::IncrementWrapFlags setFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OnFlags)
void print(raw_ostream &OS, unsigned Depth=0) const override
Prints a textual representation of this predicate with an indentation of Depth.
bool isAlwaysTrue() const override
Returns true if the predicate is always true.
const SCEVAddRecExpr * getExpr() const
Implementation of the SCEVPredicate interface.
static SCEVWrapPredicate::IncrementWrapFlags clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OffFlags)
Convenient IncrementWrapFlags manipulation methods.
static SCEVWrapPredicate::IncrementWrapFlags getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Returns the set of SCEVWrapPredicate no wrap flags implied by a SCEVAddRecExpr.
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
This class represents a zero extension of a small integer value to a larger integer value.
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
void dump() const
This method is used for debugging.
bool isAllOnesValue() const
Return true if the expression is a constant all-ones value.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
void print(raw_ostream &OS) const
Print out the internal representation of this scalar to the specified stream.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
Analysis pass that exposes the ScalarEvolution for a function.
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
void print(raw_ostream &OS, const Module *=nullptr) const override
print - Print out the internal state of the pass.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
static LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
const SCEV * rewrite(const SCEV *Expr) const
Try to apply the collected loop guards to Expr.
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
static bool hasFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags TestFlags)
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
bool isKnownOnEveryIteration(CmpPredicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS)
Test if the condition described by Pred, LHS, RHS is known to be true on every iteration of the loop ...
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterationsImpl(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getUDivCeilSCEV(const SCEV *N, const SCEV *D)
Compute ceil(N / D).
const SCEV * getGEPExpr(GEPOperator *GEP, const SmallVectorImpl< const SCEV * > &IndexExprs)
Returns an expression for a GEP.
std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
Type * getWiderType(Type *Ty1, Type *Ty2) const
const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
const SCEV * getURemExpr(const SCEV *LHS, const SCEV *RHS)
Represents an unsigned remainder expression based on unsigned division.
APInt getConstantMultiple(const SCEV *S)
Returns the max constant multiple of S.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getPredicatedConstantMaxBackedgeTakenCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Predicates)
Similar to getConstantMaxBackedgeTakenCount, except it will add a set of SCEV predicates to Predicate...
const SCEV * removePointerBase(const SCEV *S)
Compute an expression equivalent to S - getPointerBase(S).
bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags)
Update no-wrap flags of an AddRec.
const SCEV * getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS)
Promote the operands to the wider of the types using zero-extension, and then perform a umax operatio...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?...
ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
const SCEV * getZeroExtendExprImpl(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getPredicatedBackedgeTakenCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Predicates)
Similar to getBackedgeTakenCount, except it will add a set of SCEV predicates to Predicates that are ...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool loopHasNoAbnormalExits(const Loop *L)
Return true if the loop has no abnormal exits.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI)
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getCastExpr(SCEVTypes Kind, const SCEV *Op, Type *Ty)
const SCEV * getSequentialMinMaxExpr(SCEVTypes Kind, SmallVectorImpl< const SCEV * > &Operands)
const SCEV * getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth=0)
std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
APInt getUnsignedRangeMin(const SCEV *S)
Determine the min of the unsigned range for a particular SCEV.
bool SimplifyICmpOperands(CmpPredicate &Pred, const SCEV *&LHS, const SCEV *&RHS, unsigned Depth=0)
Simplify LHS and RHS in a comparison with predicate Pred.
const SCEV * getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo)
Return an expression for offsetof on the given field with type IntTy.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
const SCEV * getSignExtendExprImpl(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool hasOperand(const SCEV *S, const SCEV *Op) const
Test whether the given SCEV has Op as a direct or indirect operand.
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEVPredicate * getComparePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
const SCEV * getNotSCEV(const SCEV *V)
Return the SCEV object corresponding to ~V.
std::optional< LoopInvariantPredicate > getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI=nullptr)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L, return a LoopInvaria...
bool instructionCouldExistWithOperands(const SCEV *A, const SCEV *B)
Return true if there exists a point in the program at which both A and B could be operands to the sam...
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
uint32_t getMinTrailingZeros(const SCEV *S)
Determine the minimum number of zero bits that S is guaranteed to end in (at every loop iteration).
void print(raw_ostream &OS) const
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
const SCEV * getPredicatedExitCount(const Loop *L, const BasicBlock *ExitingBlock, SmallVectorImpl< const SCEVPredicate * > *Predicates, ExitCountKind Kind=Exact)
Same as above except this uses the predicated backedge taken info and may require predicates.
static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
APInt getSignedRangeMin(const SCEV *S)
Determine the min of the signed range for a particular SCEV.
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
@ MonotonicallyDecreasing
@ MonotonicallyIncreasing
const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags)
bool isLoopBackedgeGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether the backedge of the loop is protected by a conditional between LHS and RHS.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
APInt getNonZeroConstantMultiple(const SCEV *S)
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
static SCEV::NoWrapFlags setFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OnFlags)
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB)
Return the "disposition" of the given SCEV with respect to the given block.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
bool loopIsFiniteByAssumption(const Loop *L)
Return true if this loop is finite by assumption.
const SCEV * getExistingSCEV(Value *V)
Return an existing SCEV for V if there is one, otherwise return nullptr.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopComputable
The SCEV varies predictably with the loop.
@ LoopVariant
The SCEV is loop-variant (unknown).
@ LoopInvariant
The SCEV is loop-invariant.
friend class SCEVCallbackVH
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero=false, bool OrNegative=false)
Test if the given expression is known to be a power of 2.
std::optional< SCEV::NoWrapFlags > getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO)
Parse NSW/NUW flags from add/sub/mul IR binary operation Op into SCEV no-wrap flags,...
void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
std::optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
const SCEV * getCouldNotCompute()
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
BlockDisposition
An enum describing the relationship between a SCEV and a basic block.
@ DominatesBlock
The SCEV dominates the block.
@ ProperlyDominatesBlock
The SCEV properly dominates the block.
@ DoesNotDominateBlock
The SCEV does not dominate the block.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
void getPoisonGeneratingValues(SmallPtrSetImpl< const Value * > &Result, const SCEV *S)
Return the set of Values that, if poison, will definitively result in S being poison as well.
void forgetLoopDispositions()
Called when the client has changed the disposition of values in this loop.
const SCEV * getVScale(Type *Ty)
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl< const SCEV * > &Operands)
bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
ExitCountKind
The terms "backedge taken count" and "exit count" are used interchangeably to refer to the number of ...
@ SymbolicMaximum
An expression which provides an upper bound on the exact trip count.
@ ConstantMaximum
A constant which provides an upper bound on the exact trip count.
@ Exact
An expression exactly describing the number of times the backedge has executed when a loop is exited.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEVAddRecExpr * convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Preds)
Tries to convert the S expression to an AddRec expression, adding additional predicates to Preds as r...
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
std::optional< bool > evaluatePredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
const SCEV * getUnknown(Value *V)
std::optional< std::pair< const SCEV *, SmallVector< const SCEVPredicate *, 3 > > > createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI)
Checks if SymbolicPHI can be rewritten as an AddRecExpr under some Predicates.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getElementCount(Type *Ty, ElementCount EC)
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * rewriteUsingPredicate(const SCEV *S, const Loop *L, const SCEVPredicate &A)
Re-writes the SCEV according to the Predicates in A.
std::pair< const SCEV *, const SCEV * > SplitIntoInitAndPostInc(const Loop *L, const SCEV *S)
Splits SCEV expression S into two SCEVs.
bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getPredicatedSymbolicMaxBackedgeTakenCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Predicates)
Similar to getSymbolicMaxBackedgeTakenCount, except it will add a set of SCEV predicates to Predicate...
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
void registerUser(const SCEV *User, ArrayRef< const SCEV * > Ops)
Notify this ScalarEvolution that User directly uses SCEVs in Ops.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool isBasicBlockEntryGuardedByCond(const BasicBlock *BB, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the basic block is protected by a conditional between LHS and RHS.
const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
bool isKnownViaInduction(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
We'd like to check the predicate on every iteration of the most dominated loop between loops used in ...
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
APInt getSignedRangeMax(const SCEV *S)
Determine the max of the signed range for a particular SCEV.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
Represents an op.with.overflow intrinsic.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
std::optional< APInt > SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, unsigned RangeWidth)
Let q(n) = An^2 + Bn + C, and BW = bit width of the value range (e.g.
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
cst_pred_ty< is_all_ones > m_scev_AllOnes()
Match an integer with all bits set.
SCEVUnaryExpr_match< SCEVZeroExtendExpr, Op0_t > m_scev_ZExt(const Op0_t &Op0)
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
SCEVUnaryExpr_match< SCEVSignExtendExpr, Op0_t > m_scev_SExt(const Op0_t &Op0)
cst_pred_ty< is_zero > m_scev_Zero()
Match an integer 0.
bind_ty< const SCEVConstant > m_SCEVConstant(const SCEVConstant *&V)
bind_ty< const SCEV > m_SCEV(const SCEV *&V)
Match a SCEV, capturing it if we match.
SCEVBinaryExpr_match< SCEVAddExpr, Op0_t, Op1_t > m_scev_Add(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
bind_ty< const SCEVUnknown > m_SCEVUnknown(const SCEVUnknown *&V)
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt gcd(const DynamicAPInt &A, const DynamicAPInt &B)
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
bool mustTriggerUB(const Instruction *I, const SmallPtrSetImpl< const Value * > &KnownPoison)
Return true if the given instruction must trigger undefined behavior when I is executed with any oper...
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
bool verifyFunction(const Function &F, raw_ostream *OS=nullptr)
Check a function for errors, useful for use when debugging a pass.
auto successors(const MachineBasicBlock *BB)
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
unsigned short computeExpressionSize(ArrayRef< const SCEV * > Args)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, const DominatorTree &DT)
Returns true if the arithmetic part of the WO 's result is used only along the paths control dependen...
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void initializeScalarEvolutionWrapperPassPass(PassRegistry &)
auto reverse(ContainerTy &&C)
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isPointerTy(const Type *T)
ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool propagatesPoison(const Use &PoisonOp)
Return true if PoisonOp's user yields poison or raises UB if its operand PoisonOp is poison.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
iterator_range< df_iterator< T > > depth_first(const T &G)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
unsigned getBitWidth() const
Get the bit width of this value.
static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
bool isNegative() const
Returns true if this value is known to be negative.
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
An object of this class is returned by queries that could not be answered.
static bool classof(const SCEV *S)
Methods for support type inquiry through isa, cast, and dyn_cast:
This class defines a simple visitor class that may be used for various SCEV analysis purposes.
A utility class that uses RAII to save and restore the value of a variable.
Information about the number of loop iterations for which a loop exit's branch condition evaluates to...
ExitLimit(const SCEV *E)
Construct either an exact exit limit from a constant, or an unknown one from a SCEVCouldNotCompute.
const SCEV * ExactNotTaken
const SCEV * SymbolicMaxNotTaken
SmallVector< const SCEVPredicate *, 4 > Predicates
A vector of predicate guards for this ExitLimit.
const SCEV * ConstantMaxNotTaken