1//===- InductiveRangeCheckElimination.cpp - -------------------------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// The InductiveRangeCheckElimination pass splits a loop's iteration space into 10// three disjoint ranges. It does that in a way such that the loop running in 11// the middle loop provably does not need range checks. As an example, it will 14// len = < known positive > 15// for (i = 0; i < n; i++) { 16// if (0 <= i && i < len) { 19// throw_out_of_bounds(); 25// len = < known positive > 26// limit = smin(n, len) 28// for (i = 0; i < limit; i++) { 29// if (0 <= i && i < len) { // this check is fully redundant 32// throw_out_of_bounds(); 35// for (i = limit; i < n; i++) { 36// if (0 <= i && i < len) { 39// throw_out_of_bounds(); 43//===----------------------------------------------------------------------===// 116cl::desc(
"If set to true, IRCE may eliminate wide range checks in loops " 117"with narrow latch condition."));
122"Maximum size of range check type for which can be produced runtime " 123"overflow check of its limit's computation"));
129#define DEBUG_TYPE "irce" 133/// An inductive range check is conditional branch in a loop with a condition 134/// that is provably true for some contiguous range of values taken by the 135/// containing loop's induction variable. 137classInductiveRangeCheck {
139constSCEV *Begin =
nullptr;
140constSCEV *Step =
nullptr;
142Use *CheckUse =
nullptr;
158staticbool reassociateSubLHS(
Loop *L,
Value *VariantLHS,
Value *InvariantRHS,
163constSCEV *getBegin()
const{
return Begin; }
164constSCEV *getStep()
const{
return Step; }
165constSCEV *getEnd()
const{
returnEnd; }
168OS <<
"InductiveRangeCheck:\n";
176 getCheckUse()->getUser()->print(
OS);
177OS <<
" Operand: " << getCheckUse()->getOperandNo() <<
"\n";
185Use *getCheckUse()
const{
return CheckUse; }
187 /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If 188 /// R.getEnd() le R.getBegin(), then R denotes the empty range. 200constSCEV *getBegin()
const{
return Begin; }
201constSCEV *getEnd()
const{
returnEnd; }
212 /// This is the value the condition of the branch needs to evaluate to for the 213 /// branch to take the hot successor (see (1) above). 214bool getPassingDirection() {
returntrue; }
216 /// Computes a range for the induction variable (IndVar) in which the range 217 /// check is redundant and can be constant-folded away. The induction 218 /// variable is not required to be the canonical {0,+,1} induction variable. 221bool IsLatchSigned)
const;
223 /// Parse out a set of inductive range checks from \p BI and append them to \p 226 /// NB! There may be conditions feeding into \p BI that aren't inductive range 227 /// checks, and hence don't end up in \p Checks. 228staticvoid extractRangeChecksFromBranch(
230 std::optional<uint64_t> EstimatedTripCount,
234classInductiveRangeCheckElimination {
244// Returns the estimated number of iterations based on block frequency info if 245// available, or on branch probability info. Nullopt is returned if the number 246// of iterations cannot be estimated. 247 std::optional<uint64_t> estimatedTripCount(
constLoop &L);
252LoopInfo &LI, GetBFIFunc GetBFI = std::nullopt)
253 : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
258}
// end anonymous namespace 260/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` cannot 261/// be interpreted as a range check, return false. Otherwise set `Index` to the 262/// SCEV being range checked, and set `End` to the upper or lower limit `Index` 263/// is being range checked. 264bool InductiveRangeCheck::parseRangeCheckICmp(
Loop *L,
ICmpInst *ICI,
268auto IsLoopInvariant = [&SE,
L](
Value *
V) {
279// Canonicalize to the `Index Pred Invariant` comparison 280if (IsLoopInvariant(LHS)) {
283 }
elseif (!IsLoopInvariant(RHS))
284// Both LHS and RHS are loop variant 287if (parseIvAgaisntLimit(L, LHS, RHS, Pred, SE, Index,
End))
290if (reassociateSubLHS(L, LHS, RHS, Pred, SE, Index,
End))
293// TODO: support ReassociateAddLHS 297// Try to parse range check in the form of "IV vs Limit" 298bool InductiveRangeCheck::parseIvAgaisntLimit(
Loop *L,
Value *LHS,
Value *RHS,
304auto SIntMaxSCEV = [&](
Type *
T) {
305unsignedBitWidth = cast<IntegerType>(
T)->getBitWidth();
309constauto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(LHS));
313// We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L". 314// We can potentially do much better here. 315// If we want to adjust upper bound for the unsigned range check as we do it 316// for signed one, we will need to pick Unsigned max 321case ICmpInst::ICMP_SGE:
322if (
match(RHS, m_ConstantInt<0>())) {
329case ICmpInst::ICMP_SGT:
330if (
match(RHS, m_ConstantInt<-1>())) {
337case ICmpInst::ICMP_SLT:
338case ICmpInst::ICMP_ULT:
343case ICmpInst::ICMP_SLE:
344case ICmpInst::ICMP_ULE:
347boolSigned = Pred == ICmpInst::ICMP_SLE;
359// Try to parse range check in the form of "IV - Offset vs Limit" or "Offset - 361bool InductiveRangeCheck::reassociateSubLHS(
372bool OffsetSubtracted =
false;
374// "Offset - IV vs Limit" 377// "IV - Offset vs Limit" 378 OffsetSubtracted =
true;
382constauto *AddRec = dyn_cast<SCEVAddRecExpr>(
IV);
386// In order to turn "IV - Offset < Limit" into "IV < Limit + Offset", we need 387// to be able to freely move values from left side of inequality to right side 388// (just as in normal linear arithmetics). Overflows make things much more 389// complicated, so we want to avoid this. 391// Let's prove that the initial subtraction doesn't overflow with all IV's 392// values from the safe range constructed for that check. 394// [Case 1] IV - Offset < Limit 395// It doesn't overflow if: 396// SINT_MIN <= IV - Offset <= SINT_MAX 397// In terms of scaled SINT we need to prove: 398// SINT_MIN + Offset <= IV <= SINT_MAX + Offset 399// Safe range will be constructed: 400// 0 <= IV < Limit + Offset 401// It means that 'IV - Offset' doesn't underflow, because: 402// SINT_MIN + Offset < 0 <= IV 403// and doesn't overflow: 404// IV < Limit + Offset <= SINT_MAX + Offset 406// [Case 2] Offset - IV > Limit 407// It doesn't overflow if: 408// SINT_MIN <= Offset - IV <= SINT_MAX 409// In terms of scaled SINT we need to prove: 410// -SINT_MIN >= IV - Offset >= -SINT_MAX 411// Offset - SINT_MIN >= IV >= Offset - SINT_MAX 412// Safe range will be constructed: 413// 0 <= IV < Offset - Limit 414// It means that 'Offset - IV' doesn't underflow, because 415// Offset - SINT_MAX < 0 <= IV 416// and doesn't overflow: 417// IV < Offset - Limit <= Offset - SINT_MIN 419// For the computed upper boundary of the IV's range (Offset +/- Limit) we 420// don't know exactly whether it overflows or not. So if we can't prove this 421// fact at compile time, we scale boundary computations to a wider type with 422// the intention to add runtime overflow check. 432case Instruction::Add:
435case Instruction::Sub:
441 cast<Instruction>(VariantLHS)))
444// We couldn't prove that the expression does not overflow. 445// Than scale it to a wider type to check overflow at runtime. 457// "IV - Offset < Limit" -> "IV" < Offset + Limit 458 Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add,
Offset, Limit);
460// "Offset - IV > Limit" -> "IV" < Offset - Limit 461 Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Sub,
Offset, Limit);
462 Pred = ICmpInst::getSwappedPredicate(Pred);
465if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
466// "Expr <= Limit" -> "Expr < Limit + 1" 467if (Pred == ICmpInst::ICMP_SLE && Limit)
468 Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Limit,
479void InductiveRangeCheck::extractRangeChecksFromCond(
483Value *Condition = ConditionUse.
get();
484if (!Visited.
insert(Condition).second)
487// TODO: Do the same for OR, XOR, NOT etc? 489 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(0),
491 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(1),
496ICmpInst *ICI = dyn_cast<ICmpInst>(Condition);
502if (!parseRangeCheckICmp(L, ICI, SE, IndexAddRec,
End))
505assert(IndexAddRec &&
"IndexAddRec was not computed");
511 InductiveRangeCheck IRC;
513 IRC.Begin = IndexAddRec->
getStart();
515 IRC.CheckUse = &ConditionUse;
519void InductiveRangeCheck::extractRangeChecksFromBranch(
521 std::optional<uint64_t> EstimatedTripCount,
526unsigned IndexLoopSucc =
L->contains(BI->
getSuccessor(0)) ? 0 : 1;
528"No edges coming to loop?");
531auto SuccessProbability =
533if (EstimatedTripCount) {
534auto EstimatedEliminatedChecks =
535 SuccessProbability.
scale(*EstimatedTripCount);
537LLVM_DEBUG(
dbgs() <<
"irce: could not prove profitability for branch " 539 <<
"estimated eliminated checks too low " 540 << EstimatedEliminatedChecks <<
"\n";);
545if (SuccessProbability < LikelyTaken) {
546LLVM_DEBUG(
dbgs() <<
"irce: could not prove profitability for branch " 548 <<
"could not estimate trip count " 549 <<
"and branch success probability too low " 550 << SuccessProbability <<
"\n";);
556// IRCE expects branch's true edge comes to loop. Invert branch for opposite 558if (IndexLoopSucc != 0) {
567 InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->
getOperandUse(0),
571/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return 572/// signed or unsigned extension of \p S to type \p Ty. 578// Compute a safe set of limits for the main loop to run in -- effectively the 579// intersection of `Range' and the iteration space of the original loop. 580// Return std::nullopt if unable to compute the set of subranges. 581static std::optional<LoopConstrainer::SubRanges>
583 InductiveRangeCheck::Range &
Range,
585auto *RTy = cast<IntegerType>(
Range.getType());
586// We only support wide range checks and narrow latches. 595// I think we can be more aggressive here and make this nuw / nsw if the 596// addition that feeds into the icmp for the latch's terminating branch is nuw 597// / nsw. In any case, a wrapping 2's complement addition is safe. 599 RTy, SE, IsSignedPredicate);
601 SE, IsSignedPredicate);
605// We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or 606// [Smallest, GreatestSeen] is the range of values the induction variable 609constSCEV *Smallest =
nullptr, *Greatest =
nullptr, *GreatestSeen =
nullptr;
615// No overflow, because the range [Smallest, GreatestSeen] is not empty. 618// These two computations may sign-overflow. Here is why that is okay: 620// We know that the induction variable does not sign-overflow on any 621// iteration except the last one, and it starts at `Start` and ends at 622// `End`, decrementing by one every time. 624// * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the 625// induction variable is decreasing we know that the smallest value 626// the loop body is actually executed with is `INT_SMIN` == `Smallest`. 628// * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In 629// that case, `Clamp` will always return `Smallest` and 630// [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`) 631// will be an empty range. Returning an empty range is always safe. 635 GreatestSeen = Start;
638auto Clamp = [&SE, Smallest, Greatest, IsSignedPredicate](
constSCEV *S) {
639return IsSignedPredicate
644// In some cases we can prove that we don't need a pre or post loop. 646 IsSignedPredicate ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
648 IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
650bool ProvablyNoPreloop =
652if (!ProvablyNoPreloop)
653 Result.LowLimit = Clamp(
Range.getBegin());
655bool ProvablyNoPostLoop =
657if (!ProvablyNoPostLoop)
658 Result.HighLimit = Clamp(
Range.getEnd());
663/// Computes and returns a range of values for the induction variable (IndVar) 664/// in which the range check can be safely elided. If it cannot compute such a 665/// range, returns std::nullopt. 666std::optional<InductiveRangeCheck::Range>
669bool IsLatchSigned)
const{
670// We can deal when types of latch check and range checks don't match in case 671// if latch check is more narrow. 672auto *IVType = dyn_cast<IntegerType>(IndVar->
getType());
673auto *RCType = dyn_cast<IntegerType>(getBegin()->
getType());
674auto *EndType = dyn_cast<IntegerType>(getEnd()->
getType());
675// Do not work with pointer types. 676if (!IVType || !RCType)
678if (IVType->getBitWidth() > RCType->getBitWidth())
681// IndVar is of the form "A + B * I" (where "I" is the canonical induction 682// variable, that may or may not exist as a real llvm::Value in the loop) and 683// this inductive range check is a range check on the "C + D * I" ("C" is 684// getBegin() and "D" is getStep()). We rewrite the value being range 685// checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA". 687// The actual inequalities we solve are of the form 689// 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1) 691// Here L stands for upper limit of the safe iteration space. 692// The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid 693// overflows when calculating (0 - M) and (L - M) we, depending on type of 694// IV's iteration space, limit the calculations by borders of the iteration 695// space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0. 696// If we figured out that "anything greater than (-M) is safe", we strengthen 697// this to "everything greater than 0 is safe", assuming that values between 698// -M and 0 just do not exist in unsigned iteration space, and we don't want 699// to deal with overflown values. 709assert(!
B->isZero() &&
"Recurrence with zero step?");
711constSCEV *
C = getBegin();
716assert(!
D->getValue()->isZero() &&
"Recurrence with zero step?");
717unsignedBitWidth = RCType->getBitWidth();
721// Subtract Y from X so that it does not go through border of the IV 722// iteration space. Mathematically, it is equivalent to: 724// ClampedSubtract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX). [1] 726// In [1], 'X - Y' is a mathematical subtraction (result is not bounded to 727// any width of bit grid). But after we take min/max, the result is 728// guaranteed to be within [INT_MIN, INT_MAX]. 730// In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min 731// values, depending on type of latch condition that defines IV iteration 733auto ClampedSubtract = [&](
constSCEV *
X,
constSCEV *
Y) {
734// FIXME: The current implementation assumes that X is in [0, SINT_MAX]. 735// This is required to ensure that SINT_MAX - X does not overflow signed and 736// that X - Y does not overflow unsigned if Y is negative. Can we lift this 737// restriction and make it work for negative X either? 739// X is a number from signed range, Y is interpreted as signed. 740// Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only 741// thing we should care about is that we didn't cross SINT_MAX. 742// So, if Y is positive, we subtract Y safely. 743// Rule 1: Y > 0 ---> Y. 744// If 0 <= -Y <= (SINT_MAX - X), we subtract Y safely. 745// Rule 2: Y >=s (X - SINT_MAX) ---> Y. 746// If 0 <= (SINT_MAX - X) < -Y, we can only subtract (X - SINT_MAX). 747// Rule 3: Y <s (X - SINT_MAX) ---> (X - SINT_MAX). 748// It gives us smax(Y, X - SINT_MAX) to subtract in all cases. 753// X is a number from unsigned range, Y is interpreted as signed. 754// Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only 755// thing we should care about is that we didn't cross zero. 756// So, if Y is negative, we subtract Y safely. 757// Rule 1: Y <s 0 ---> Y. 758// If 0 <= Y <= X, we subtract Y safely. 759// Rule 2: Y <=s X ---> Y. 760// If 0 <= X < Y, we should stop at 0 and can only subtract X. 761// Rule 3: Y >s X ---> X. 762// It gives us smin(X, Y) to subtract in all cases. 768// This function returns SCEV equal to 1 if X is non-negative 0 otherwise. 769auto SCEVCheckNonNegative = [&](
constSCEV *
X) {
773// Can we trivially prove that X is a non-negative or negative value? 778// If not, we will have to figure it out during the execution. 779// Function smax(smin(X, 0), -1) + 1 equals to 1 if X >= 0 and 0 if X < 0. 784// This function returns SCEV equal to 1 if X will not overflow in terms of 785// range check type, 0 otherwise. 786auto SCEVCheckWillNotOverflow = [&](
constSCEV *
X) {
787// X doesn't overflow if SINT_MAX >= X. 788// Then if (SINT_MAX - X) >= 0, X doesn't overflow 790constSCEV *OverflowCheck =
793// X doesn't underflow if X >= SINT_MIN. 794// Then if (X - SINT_MIN) >= 0, X doesn't underflow 796constSCEV *UnderflowCheck =
799return SE.
getMulExpr(OverflowCheck, UnderflowCheck);
802// FIXME: Current implementation of ClampedSubtract implicitly assumes that 803// X is non-negative (in sense of a signed value). We need to re-implement 804// this function in a way that it will correctly handle negative X as well. 805// We use it twice: for X = 0 everything is fine, but for X = getEnd() we can 806// end up with a negative X and produce wrong results. So currently we ensure 807// that if getEnd() is negative then both ends of the safe range are zero. 808// Note that this may pessimize elimination of unsigned range checks against 810constSCEV *REnd = getEnd();
811constSCEV *EndWillNotOverflow = SE.
getOne(RCType);
815OS <<
"irce: in function ";
816OS <<
L->getHeader()->getParent()->getName();
819OS <<
"there is range check with scaled boundary:\n";
823if (EndType->getBitWidth() > RCType->getBitWidth()) {
824assert(EndType->getBitWidth() == RCType->getBitWidth() * 2);
826 PrintRangeCheck(
errs());
827// End is computed with extended type but will be truncated to a narrow one 828// type of range check. Therefore we need a check that the result will not 829// overflow in terms of narrow type. 835constSCEV *RuntimeChecks =
836 SE.
getMulExpr(SCEVCheckNonNegative(REnd), EndWillNotOverflow);
837constSCEV *Begin = SE.
getMulExpr(ClampedSubtract(Zero, M), RuntimeChecks);
840return InductiveRangeCheck::Range(Begin,
End);
843static std::optional<InductiveRangeCheck::Range>
845const std::optional<InductiveRangeCheck::Range> &R1,
846const InductiveRangeCheck::Range &
R2) {
847if (
R2.isEmpty(SE,
/* IsSigned */true))
852// We never return empty ranges from this function, and R1 is supposed to be 853// a result of intersection. Thus, R1 is never empty. 854assert(!R1Value.isEmpty(SE,
/* IsSigned */true) &&
855"We should never have empty R1!");
857// TODO: we could widen the smaller range and have this work; but for now we 858// bail out to keep things simple. 859if (R1Value.getType() !=
R2.getType())
865// If the resulting range is empty, just return std::nullopt. 866auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
867if (Ret.isEmpty(SE,
/* IsSigned */true))
872static std::optional<InductiveRangeCheck::Range>
874const std::optional<InductiveRangeCheck::Range> &R1,
875const InductiveRangeCheck::Range &
R2) {
876if (
R2.isEmpty(SE,
/* IsSigned */false))
881// We never return empty ranges from this function, and R1 is supposed to be 882// a result of intersection. Thus, R1 is never empty. 883assert(!R1Value.isEmpty(SE,
/* IsSigned */false) &&
884"We should never have empty R1!");
886// TODO: we could widen the smaller range and have this work; but for now we 887// bail out to keep things simple. 888if (R1Value.getType() !=
R2.getType())
894// If the resulting range is empty, just return std::nullopt. 895auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
896if (Ret.isEmpty(SE,
/* IsSigned */false))
904// There are no loops in the function. Return before computing other expensive 911// Get BFI analysis result on demand. Please note that modification of 912// CFG invalidates this analysis and we should handle it. 916 InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
920bool CFGChanged =
false;
921for (
constauto &L : LI) {
922 CFGChanged |=
simplifyLoop(L, &DT, &LI, &SE,
nullptr,
nullptr,
923/*PreserveLCSSA=*/false);
926 Changed |= CFGChanged;
937auto LPMAddNewLoop = [&Worklist](
Loop *NL,
bool IsSubloop) {
942while (!Worklist.
empty()) {
944if (IRCE.run(L, LPMAddNewLoop)) {
959std::optional<uint64_t>
960InductiveRangeCheckElimination::estimatedTripCount(
constLoop &L) {
963uint64_t hFreq = BFI.getBlockFreq(L.getHeader()).getFrequency();
964uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
965if (phFreq == 0 || hFreq == 0)
967return {hFreq / phFreq};
973auto *Latch =
L.getLoopLatch();
976auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
980auto LatchBrExitIdx = LatchBr->getSuccessor(0) ==
L.getHeader() ? 1 : 0;
989bool InductiveRangeCheckElimination::run(
992LLVM_DEBUG(
dbgs() <<
"irce: giving up constraining loop, too large\n");
1002auto EstimatedTripCount = estimatedTripCount(*L);
1006 <<
"the estimated number of iterations is " 1007 << *EstimatedTripCount <<
"\n");
1015for (
auto *BBI :
L->getBlocks())
1016if (
BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
1017 InductiveRangeCheck::extractRangeChecksFromBranch(
1018 TBI, L, SE, BPI, EstimatedTripCount, RangeChecks, Changed);
1020if (RangeChecks.
empty())
1024OS <<
"irce: looking at loop ";
L->print(
OS);
1025OS <<
"irce: loop has " << RangeChecks.
size()
1026 <<
" inductive range checks: \n";
1027for (InductiveRangeCheck &IRC : RangeChecks)
1034 PrintRecognizedRangeChecks(
errs());
1036constchar *FailureReason =
nullptr;
1037 std::optional<LoopStructure> MaybeLoopStructure =
1040if (!MaybeLoopStructure) {
1042 << FailureReason <<
"\n";);
1049 std::optional<InductiveRangeCheck::Range> SafeIterRange;
1052// Basing on the type of latch predicate, we interpret the IV iteration range 1053// as signed or unsigned range. We use different min/max functions (signed or 1054// unsigned) when intersecting this range with safe iteration ranges implied 1056auto IntersectRange =
1059for (InductiveRangeCheck &IRC : RangeChecks) {
1060autoResult = IRC.computeSafeIterationSpace(SE, IndVar,
1061LS.IsSignedPredicate);
1063auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, *Result);
1064if (MaybeSafeIterRange) {
1065assert(!MaybeSafeIterRange->isEmpty(SE,
LS.IsSignedPredicate) &&
1066"We should never return empty ranges!");
1068 SafeIterRange = *MaybeSafeIterRange;
1076 std::optional<LoopConstrainer::SubRanges> MaybeSR =
1084 SafeIterRange->getBegin()->getType(), *MaybeSR);
1089auto PrintConstrainedLoopInfo = [
L]() {
1090dbgs() <<
"irce: in function ";
1091dbgs() <<
L->getHeader()->getParent()->getName() <<
": ";
1092dbgs() <<
"constrained ";
1099 PrintConstrainedLoopInfo();
1101// Optimize away the now-redundant range checks. 1103for (InductiveRangeCheck &IRC : RangeChecksToEliminate) {
1104ConstantInt *FoldedRangeCheck = IRC.getPassingDirection()
1107 IRC.getCheckUse()->set(FoldedRangeCheck);
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static const SCEV * NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE, bool Signed)
If the type of S matches with Ty, return S.
static cl::opt< bool > PrintRangeChecks("irce-print-range-checks", cl::Hidden, cl::init(false))
static cl::opt< bool > AllowUnsignedLatchCondition("irce-allow-unsigned-latch", cl::Hidden, cl::init(true))
static cl::opt< unsigned > LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, cl::init(64))
static std::optional< InductiveRangeCheck::Range > IntersectSignedRange(ScalarEvolution &SE, const std::optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
static cl::opt< bool > AllowNarrowLatchCondition("irce-allow-narrow-latch", cl::Hidden, cl::init(true), cl::desc("If set to true, IRCE may eliminate wide range checks in loops " "with narrow latch condition."))
static cl::opt< unsigned > MaxTypeSizeForOverflowCheck("irce-max-type-size-for-overflow-check", cl::Hidden, cl::init(32), cl::desc("Maximum size of range check type for which can be produced runtime " "overflow check of its limit's computation"))
static cl::opt< unsigned > MinEliminatedChecks("irce-min-eliminated-checks", cl::Hidden, cl::init(10))
static cl::opt< bool > PrintChangedLoops("irce-print-changed-loops", cl::Hidden, cl::init(false))
static std::optional< InductiveRangeCheck::Range > IntersectUnsignedRange(ScalarEvolution &SE, const std::optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
static cl::opt< bool > SkipProfitabilityChecks("irce-skip-profitability-checks", cl::Hidden, cl::init(false))
static std::optional< LoopConstrainer::SubRanges > calculateSubRanges(ScalarEvolution &SE, const Loop &L, InductiveRangeCheck::Range &Range, const LoopStructure &MainLoopStructure)
static cl::opt< bool > PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks", cl::Hidden, cl::init(false))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
This header provides classes for managing per-loop analyses.
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
This file provides a priority worklist.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
static const uint32_t IV[8]
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
LLVMContext & getContext() const
Get the context in which this basic block lives.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
uint64_t scaleByInverse(uint64_t Num) const
Scale a large integer by the inverse.
uint64_t scale(uint64_t Num) const
Scale a large integer.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Predicate getPredicate() const
Return the predicate for this instruction.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
This is an important class for using LLVM in a threaded context.
Analysis pass that exposes the LoopInfo for a function.
This class is used to constrain loops to run within a given iteration space.
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void abandon()
Mark an analysis as abandoned.
bool empty() const
Determine if the PriorityWorklist is empty or not.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
This class represents an analyzed expression in the program.
void print(raw_ostream &OS) const
Print out the internal representation of this scalar to the specified stream.
Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?...
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
constexpr unsigned BitWidth
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
IntegerType * ExitCountTy
static std::optional< LoopStructure > parseLoopStructure(ScalarEvolution &, Loop &, bool, const char *&)