1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file defines common loop utility functions. 11//===----------------------------------------------------------------------===// 52#define DEBUG_TYPE "loop-utils" 62// We re-use a vector for the in-loop predecesosrs. 67"Must start with an empty predecessors list!");
70// See if there are any non-loop predecessors of this exit block and 71// keep track of the in-loop predecessors. 72bool IsDedicatedExit =
true;
74if (L->contains(PredBB)) {
75if (isa<IndirectBrInst>(PredBB->getTerminator()))
76// We cannot rewrite exiting edges from an indirectbr. 81 IsDedicatedExit =
false;
84assert(!InLoopPredecessors.
empty() &&
"Must have *some* loop predecessor!");
86// Nothing to do if this is already a dedicated exit. 91 BB, InLoopPredecessors,
".loopexit", DT, LI, MSSAU, PreserveLCSSA);
95dbgs() <<
"WARNING: Can't create a dedicated exit block for loop: " 99 << NewExitBB->getName() <<
"\n");
103// Walk the exit blocks directly rather than building up a data structure for 104// them, but only visit each one once. 106for (
auto *BB : L->blocks())
108// We're looking for exit blocks so skip in-loop successors. 109if (L->contains(SuccBB))
112// Visit each exit block exactly once. 113if (!Visited.
insert(SuccBB).second)
116 Changed |= RewriteExit(SuccBB);
122/// Returns the instructions that use values defined in the loop. 126for (
auto *
Block : L->getBlocks())
127// FIXME: I believe that this could use copy_if if the Inst reference could 128// be adapted into a pointer. 129for (
auto &Inst : *
Block) {
130autoUsers = Inst.users();
132auto *
Use = cast<Instruction>(U);
133return !L->contains(
Use->getParent());
142// By definition, all loop passes need the LoopInfo analysis and the 143// Dominator tree it depends on. Because they all participate in the loop 144// pass manager, they must also preserve these. 150// We must also preserve LoopSimplify and LCSSA. We locally access their IDs 151// here because users shouldn't directly get them from this header. 158// This is used in the LPPassManager to perform LCSSA verification on passes 159// which preserve lcssa form 163// Loop passes are designed to run inside of a loop pass manager which means 164// that any function analyses they require must be required by the first loop 165// pass in the manager (so that it is computed before the loop pass manager 166// runs) and preserved by all loop pasess in the manager. To make this 167// reasonably robust, the set needed for most loop passes is maintained here. 168// If your loop pass requires an analysis not listed here, you will need to 169// carefully audit the loop pass manager nesting structure that results. 177// FIXME: When all loop passes preserve MemorySSA, it can be required and 178// preserved here instead of the individual handling in each pass. 181/// Manually defined generic "LoopPass" dependency initialization. This is used 182/// to initialize the exact set of passes from above in \c 183/// getLoopAnalysisUsage. It can be used within a loop pass's initialization 186/// INITIALIZE_PASS_DEPENDENCY(LoopPass) 188/// As-if "LoopPass" were a pass. 202/// Create MDNode for input string. 211/// Set input string into loop metadata by keeping other values intact. 212/// If the string is already in loop metadata update value if it is 217// If the loop already has metadata, retain it. 222// If it is of form key = value, try to parse it. 223if (Node->getNumOperands() == 2) {
224MDString *S = dyn_cast<MDString>(Node->getOperand(0));
227 mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
229// It is already in place. Do nothing. 231// We need to update the value, so just skip it here and it will 232// be added after copying other existed nodes. 241// Replace current metadata node with new one. 244// Set operand 0 to refer to the loop id itself. 249std::optional<ElementCount>
251 std::optional<int> Width =
256 TheLoop,
"llvm.loop.vectorize.scalable.enable");
265constchar *InheritOptionsExceptPrefix,
bool AlwaysNew) {
274bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275bool InheritSomeAttrs =
276 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] !=
'\0';
281if (InheritAllAttrs || InheritSomeAttrs) {
283MDNode *
Op = cast<MDNode>(Existing.get());
285auto InheritThisAttribute = [InheritSomeAttrs,
286 InheritOptionsExceptPrefix](
MDNode *
Op) {
287if (!InheritSomeAttrs)
290// Skip malformatted attribute metadata nodes. 294if (!isa<MDString>(NameMD))
296StringRef AttrName = cast<MDString>(NameMD)->getString();
298// Do not inherit excluded attributes. 299return !AttrName.
starts_with(InheritOptionsExceptPrefix);
302if (InheritThisAttribute(
Op))
308// Modified if we dropped at least one attribute. 312bool HasAnyFollowup =
false;
313for (
StringRef OptionName : FollowupOptions) {
318 HasAnyFollowup =
true;
325// Attributes of the followup loop not specified explicity, so signal to the 326// transformation pass to add suitable attributes. 327if (!AlwaysNew && !HasAnyFollowup)
330// If no attributes were added or remove, the previous loop Id can be reused. 331if (!AlwaysNew && !Changed)
334// No attributes is equivalent to having no !llvm.loop metadata at all. 338// Build the new loop ID. 341return FollowupLoopID;
356 std::optional<int> Count =
377 std::optional<int> Count =
392 std::optional<bool>
Enable =
398 std::optional<ElementCount> VectorizeWidth =
400 std::optional<int> InterleaveCount =
403// 'Forcing' vector width and interleave count to one effectively disables 404// this tranformation. 405if (
Enable ==
true && VectorizeWidth && VectorizeWidth->isScalar() &&
406 InterleaveCount == 1)
415if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
418if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
447/// Does a BFS from a given node to all of its children inside a given loop. 448/// The returned vector of basic blocks includes the starting point. 454// Only include subregions in the top level loop. 460 AddRegionToWorklist(
N);
462for (
size_tI = 0;
I < Worklist.
size();
I++) {
464 AddRegionToWorklist(Child);
472assert(LatchIdx != -1 &&
"LatchBlock is not a case in this PHINode");
476if (U !=
Cond && U != IncV)
returnfalse;
479if (U !=
Cond && U != PN)
returnfalse;
486assert((!DT || L->isLCSSAForm(*DT)) &&
"Expected LCSSA!");
487auto *Preheader = L->getLoopPreheader();
488assert(Preheader &&
"Preheader should exist!");
490 std::unique_ptr<MemorySSAUpdater> MSSAU;
492 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
494// Now that we know the removal is safe, remove the loop by changing the 495// branch from the preheader to go to the single exit block. 497// Because we're deleting a large chunk of code at once, the sequence in which 498// we remove things is very important to avoid invalidation issues. 500// Tell ScalarEvolution that the loop is deleted. Do this before 501// deleting the loop so that ScalarEvolution can look at the loop 502// to determine what it needs to clean up. 510"Preheader must end with a side-effect-free terminator");
512"Preheader must have a single successor");
513// Connect the preheader to the exit block. Keep the old edge to the header 514// around to perform the dominator tree update in two separate steps 515// -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 516// preheader -> header. 519// 0. Preheader 1. Preheader 2. Preheader 522// Header <--\ | Header <--\ | Header <--\ 523 // | | | | | | | | | | | 524// | V | | | V | | | V | 525// | Body --/ | | Body --/ | | Body --/ 529// By doing this is two separate steps we can perform the dominator tree 530// update without using the batch update API. 532// Even when the loop is never executed, we cannot remove the edge from the 533// source block to the exit block. Consider the case where the unexecuted loop 534// branches back to an outer loop. If we deleted the loop and removed the edge 535// coming to this inner loop, this will break the outer loop structure (by 536// deleting the backedge of the outer loop). If the outer loop is indeed a 537// non-loop, it will be deleted in a future iteration of loop deletion pass. 540auto *ExitBlock = L->getUniqueExitBlock();
543assert(ExitBlock &&
"Should have a unique exit block!");
544assert(L->hasDedicatedExits() &&
"Loop should have dedicated exits!");
547// Remove the old branch. The conditional branch becomes a new terminator. 550// Rewrite phis in the exit block to get their inputs from the Preheader 551// instead of the exiting block. 552for (
PHINode &
P : ExitBlock->phis()) {
553// Set the zero'th element of Phi to be from the preheader and remove all 554// other incoming values. Given the loop has dedicated exits, all other 555// incoming values must be from the exiting blocks. 557P.setIncomingBlock(PredIndex, Preheader);
558// Removes all incoming values from all other exiting blocks (including 559// duplicate values from an exiting block). 560// Nuke all entries except the zero'th entry which is the preheader entry. 561P.removeIncomingValueIf([](
unsignedIdx) {
returnIdx != 0; },
562/* DeletePHIIfEmpty */false);
564assert((
P.getNumIncomingValues() == 1 &&
565P.getIncomingBlock(PredIndex) == Preheader) &&
566"Should have exactly one value and that's from the preheader!");
570 DTU.
applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
572 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
579// Disconnect the loop body by branching directly to its exit. 582// Remove the old branch. 585assert(L->hasNoExitBlocks() &&
586"Loop should have either zero or one exit blocks.");
594 DTU.
applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
596 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
600 MSSAU->removeBlocks(DeadBlockSet);
606// Use a map to unique and a vector to guarantee deterministic ordering. 612// Given LCSSA form is satisfied, we should not have users of instructions 613// within the dead loop outside of the loop. However, LCSSA doesn't take 614// unreachable uses into account. We handle them here. 615// We could do it after drop all references (in this case all users in the 616// loop will be already eliminated and we have less work to do but according 617// to API doc of User::dropAllReferences only valid operation after dropping 618// references, is deletion. So let's substitute all usages of 619// instruction from the loop with poison value of corresponding type first. 620for (
auto *
Block : L->blocks())
624if (
auto *Usr = dyn_cast<Instruction>(U.getUser()))
625if (L->contains(Usr->getParent()))
627// If we have a DT then we can check that uses outside a loop only in 631"Unexpected user in reachable block");
635// RemoveDIs: do the same as below for DbgVariableRecords. 636if (
Block->IsNewDbgInfoFormat) {
640 DVR.getDebugLoc().get());
641if (!DeadDebugSet.
insert(Key).second)
643// Unlinks the DVR from it's container, for later insertion. 644 DVR.removeFromParent();
649// For one of each variable encountered, preserve a debug intrinsic (set 650// to Poison) and transfer it to the loop exit. This terminates any 651// variable locations that were set during the loop. 652auto *DVI = dyn_cast<DbgVariableIntrinsic>(&
I);
660// After the loop has been deleted all the values defined and modified 661// inside the loop are going to be unavailable. Values computed in the 662// loop will have been deleted, automatically causing their debug uses 663// be be replaced with undef. Loop invariant values will still be available. 664// Move dbg.values out the loop so that earlier location ranges are still 665// terminated and loop invariant assignments are preserved. 668 ExitBlock->getFirstInsertionPt();
669assert(InsertDbgValueBefore != ExitBlock->end() &&
670"There should be a non-PHI instruction in exit block, else these " 671"instructions will have no parent.");
673for (
auto *DVI : DeadDebugInst)
674 DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);
676// Due to the "head" bit in BasicBlock::iterator, we're going to insert 677// each DbgVariableRecord right at the start of the block, wheras dbg.values 678// would be repeatedly inserted before the first instruction. To replicate 679// this behaviour, do it backwards. 681 ExitBlock->insertDbgRecordBefore(DVR, InsertDbgValueBefore);
684// Remove the block from the reference counting scheme, so that we can 685// delete it freely later. 686for (
auto *
Block : L->blocks())
687Block->dropAllReferences();
693// Erase the instructions and the blocks without having to worry 694// about ordering because we already dropped the references. 695// NOTE: This iteration is safe because erasing the block does not remove 696// its entry from the loop's block list. We do that in the next section. 698 BB->eraseFromParent();
700// Finally, the blocks from loopinfo. This has to happen late because 701// otherwise our loop iterators won't work. 704blocks.insert(L->block_begin(), L->block_end());
708// The last step is to update LoopInfo now that we've eliminated this loop. 709// Note: LoopInfo::erase remove the given loop and relink its subloops with 710// its parent. While removeLoop/removeChildLoop remove the given loop but 711// not relink its subloops, which is what we want. 712if (
Loop *ParentLoop = L->getParentLoop()) {
714assert(
I != ParentLoop->end() &&
"Couldn't find loop");
715 ParentLoop->removeChildLoop(
I);
727auto *Latch = L->getLoopLatch();
728assert(Latch &&
"multiple latches not yet supported");
729auto *Header = L->getHeader();
730Loop *OutermostLoop = L->getOutermostLoop();
735 std::unique_ptr<MemorySSAUpdater> MSSAU;
737 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
739// Update the CFG and domtree. We chose to special case a couple of 740// of common cases for code quality and test readability reasons. 742if (
auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
743if (!BI->isConditional()) {
750// Conditional latch/exit - note that latch can be shared by inner 751// and outer loop so the other target doesn't need to an exit 752if (L->isLoopExiting(Latch)) {
753// TODO: Generalize ConstantFoldTerminator so that it can be used 754// here without invalidating LCSSA or MemorySSA. (Tricky case for 755// LCSSA: header is an exit block of a preceeding sibling loop w/o 757constunsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
758BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
761 Header->removePredecessor(Latch,
true);
764auto *NewBI = Builder.
CreateBr(ExitBB);
765// Transfer the metadata to the new branch instruction (minus the 766// loop info since this is no longer a loop) 768 LLVMContext::MD_annotation});
770 BI->eraseFromParent();
771 DTU.
applyUpdates({{DominatorTree::Delete, Latch, Header}});
773 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
778// General case. By splitting the backedge, and then explicitly making it 779// unreachable we gracefully handle corner cases such as switch and invoke 781auto *BackedgeBB =
SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
785/*PreserveLCSSA*/true, &DTU, MSSAU.get());
788// Erase (and destroy) this loop instance. Handles relinking sub-loops 789// and blocks within the loop as needed. 792// If the loop we broke had a parent, then changeToUnreachable might have 793// caused a block to be removed from the parent loop (see loop_nest_lcssa 794// test case in zero-btc.ll for an example), thus changing the parent's 795// exit blocks. If that happened, we need to rebuild LCSSA on the outermost 796// loop which might have a had a block removed. 797if (OutermostLoop != L)
802/// Checks if \p L has an exiting latch branch. There may also be other 803/// exiting blocks. Returns branch instruction terminating the loop 804/// latch if above check is successful, nullptr otherwise. 811if (!LatchBR || LatchBR->
getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
816"At least one edge out of the latch must go to the header");
821/// Return the estimated trip count for any exiting branch which dominates 826// To estimate the number of times the loop body was executed, we want to 827// know the number of times the backedge was taken, vs. the number of times 828// we exited the loop. 837// Don't have a way to return predicated infinite 840 OrigExitWeight = ExitWeight;
842// Estimated exit count is a ratio of the loop weight by the weight of the 843// edge exiting the loop, rounded to nearest. 845// Estimated trip count is one plus estimated exit count. 849std::optional<unsigned>
851unsigned *EstimatedLoopInvocationWeight) {
852// Currently we take the estimate exit count only from the loop latch, 853// ignoring other exiting blocks. This can overestimate the trip count 854// if we exit through another exit, but can never underestimate it. 855// TODO: incorporate information from other exits 858if (std::optional<uint64_t> EstTripCount =
860if (EstimatedLoopInvocationWeight)
861 *EstimatedLoopInvocationWeight = ExitWeight;
869unsigned EstimatedloopInvocationWeight) {
870// At the moment, we currently support changing the estimate trip count of 871// the latch branch only. We could extend this API to manipulate estimated 872// trip counts for any exit. 877// Calculate taken and exit weights. 878unsigned LatchExitWeight = 0;
879unsigned BackedgeTakenWeight = 0;
881if (EstimatedTripCount > 0) {
882 LatchExitWeight = EstimatedloopInvocationWeight;
883 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
886// Make a swap if back edge is taken when condition is "false". 888std::swap(BackedgeTakenWeight, LatchExitWeight);
892// Set/Update profile metadata. 894 LLVMContext::MD_prof,
906// Get the backedge taken count for the inner loop 909if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
913// Get whether count is invariant to the outer loop 927return Intrinsic::vector_reduce_add;
929return Intrinsic::vector_reduce_mul;
931return Intrinsic::vector_reduce_and;
933return Intrinsic::vector_reduce_or;
935return Intrinsic::vector_reduce_xor;
936case RecurKind::FMulAdd:
938return Intrinsic::vector_reduce_fadd;
940return Intrinsic::vector_reduce_fmul;
942return Intrinsic::vector_reduce_smax;
944return Intrinsic::vector_reduce_smin;
946return Intrinsic::vector_reduce_umax;
948return Intrinsic::vector_reduce_umin;
950return Intrinsic::vector_reduce_fmax;
952return Intrinsic::vector_reduce_fmin;
953case RecurKind::FMaximum:
954return Intrinsic::vector_reduce_fmaximum;
955case RecurKind::FMinimum:
956return Intrinsic::vector_reduce_fminimum;
962case Intrinsic::vector_reduce_fadd:
963return Instruction::FAdd;
964case Intrinsic::vector_reduce_fmul:
965return Instruction::FMul;
966case Intrinsic::vector_reduce_add:
967return Instruction::Add;
968case Intrinsic::vector_reduce_mul:
969return Instruction::Mul;
970case Intrinsic::vector_reduce_and:
971return Instruction::And;
972case Intrinsic::vector_reduce_or:
973return Instruction::Or;
974case Intrinsic::vector_reduce_xor:
975return Instruction::Xor;
976case Intrinsic::vector_reduce_smax:
977case Intrinsic::vector_reduce_smin:
978case Intrinsic::vector_reduce_umax:
979case Intrinsic::vector_reduce_umin:
980return Instruction::ICmp;
981case Intrinsic::vector_reduce_fmax:
982case Intrinsic::vector_reduce_fmin:
983return Instruction::FCmp;
993case Intrinsic::vector_reduce_umin:
994return Intrinsic::umin;
995case Intrinsic::vector_reduce_umax:
996return Intrinsic::umax;
997case Intrinsic::vector_reduce_smin:
998return Intrinsic::smin;
999case Intrinsic::vector_reduce_smax:
1000return Intrinsic::smax;
1001case Intrinsic::vector_reduce_fmin:
1002return Intrinsic::minnum;
1003case Intrinsic::vector_reduce_fmax:
1004return Intrinsic::maxnum;
1005case Intrinsic::vector_reduce_fminimum:
1006return Intrinsic::minimum;
1007case Intrinsic::vector_reduce_fmaximum:
1008return Intrinsic::maximum;
1016case RecurKind::UMin:
1017return Intrinsic::umin;
1018case RecurKind::UMax:
1019return Intrinsic::umax;
1020case RecurKind::SMin:
1021return Intrinsic::smin;
1022case RecurKind::SMax:
1023return Intrinsic::smax;
1024case RecurKind::FMin:
1025return Intrinsic::minnum;
1026case RecurKind::FMax:
1027return Intrinsic::maxnum;
1028case RecurKind::FMinimum:
1029return Intrinsic::minimum;
1030case RecurKind::FMaximum:
1031return Intrinsic::maximum;
1037case Intrinsic::vector_reduce_smax:
1038return RecurKind::SMax;
1039case Intrinsic::vector_reduce_smin:
1040return RecurKind::SMin;
1041case Intrinsic::vector_reduce_umax:
1042return RecurKind::UMax;
1043case Intrinsic::vector_reduce_umin:
1044return RecurKind::UMin;
1045case Intrinsic::vector_reduce_fmax:
1046return RecurKind::FMax;
1047case Intrinsic::vector_reduce_fmin:
1048return RecurKind::FMin;
1050return RecurKind::None;
1058case RecurKind::UMin:
1060case RecurKind::UMax:
1062case RecurKind::SMin:
1064case RecurKind::SMax:
1066case RecurKind::FMin:
1068case RecurKind::FMax:
1070// We do not add FMinimum/FMaximum recurrence kind here since there is no 1071// equivalent predicate which compares signed zeroes according to the 1072// semantics of the intrinsics (llvm.minimum/maximum). 1080 (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
1081// TODO: Add float minnum/maxnum support when FMF nnan is set. 1092// Helper to generate an ordered reduction. 1095unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1097// Extract and apply reduction ops in ascending order: 1098// e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] 1100for (
unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
1104if (
Op != Instruction::ICmp &&
Op != Instruction::FCmp) {
1117// Helper to generate a log2 shuffle reduction. 1122unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1123// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1124// and vector ops, reducing the set of values being computed by half each 1127"Reduction emission only supported for pow2 vectors!");
1128// Note: fast-math-flags flags are controlled by the builder configuration 1129// and are assumed to apply to all generated arithmetic instructions. Other 1130// poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part 1131// of the builder configuration, and since they're not passed explicitly, 1132// will never be relevant here. Note that it would be generally unsound to 1133// propagate these from an intrinsic call to the expansion anyways as we/ 1134// change the order of operations. 1135auto BuildShuffledOp = [&Builder, &
Op,
1137Value *&TmpVec) ->
void {
1139if (
Op != Instruction::ICmp &&
Op != Instruction::FCmp) {
1150if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1152for (
unsigned stride = 1; stride < VF; stride <<= 1) {
1153// Initialise the mask with undef. 1154 std::fill(ShuffleMask.
begin(), ShuffleMask.
end(), -1);
1155for (
unsigned j = 0; j < VF; j += stride << 1) {
1156 ShuffleMask[j] = j + stride;
1158 BuildShuffledOp(ShuffleMask, TmpVec);
1162for (
unsigned i = VF; i != 1; i >>= 1) {
1163// Move the upper half of the vector to the lower half. 1164for (
unsigned j = 0; j != i / 2; ++j)
1165 ShuffleMask[j] = i / 2 + j;
1167// Fill the rest of the mask with undef. 1168 std::fill(&ShuffleMask[i / 2], ShuffleMask.
end(), -1);
1169 BuildShuffledOp(ShuffleMask, TmpVec);
1172// The result is in the first element of the vector. 1181"Unexpected reduction kind");
1182Value *InitVal =
Desc.getRecurrenceStartValue();
1183Value *NewVal =
nullptr;
1185// First use the original phi to determine the new value we're trying to 1186// select from in the loop. 1188for (
auto *U : OrigPhi->
users()) {
1189if ((SI = dyn_cast<SelectInst>(U)))
1192assert(SI &&
"One user of the original phi should be a select");
1194if (SI->getTrueValue() == OrigPhi)
1195 NewVal = SI->getFalseValue();
1197assert(SI->getFalseValue() == OrigPhi &&
1198"At least one input to the select should be the original Phi");
1199 NewVal = SI->getTrueValue();
1202// If any predicate is true it means that we want to select the new value. 1204 Src->getType()->isVectorTy() ? Builder.
CreateOrReduce(Src) : Src;
1205// The compares in the loop may yield poison, which propagates through the 1206// bitwise ORs. Freeze it here before the condition is used. 1208return Builder.
CreateSelect(AnyOf, NewVal, InitVal,
"rdx.select");
1214Desc.getRecurrenceKind()) &&
1215"Unexpected reduction kind");
1216Value *StartVal =
Desc.getRecurrenceStartValue();
1218Value *MaxRdx = Src->getType()->isVectorTy()
1221// Correct the final reduction result back to the start value if the maximum 1222// reduction is sentinel value. 1225return Builder.
CreateSelect(Cmp, MaxRdx, StartVal,
"rdx.select");
1230bool Negative =
false;
1234case Intrinsic::vector_reduce_add:
1235case Intrinsic::vector_reduce_mul:
1236case Intrinsic::vector_reduce_or:
1237case Intrinsic::vector_reduce_xor:
1238case Intrinsic::vector_reduce_and:
1239case Intrinsic::vector_reduce_fadd:
1240case Intrinsic::vector_reduce_fmul: {
1243 Flags.noSignedZeros());
1245case Intrinsic::vector_reduce_umax:
1246case Intrinsic::vector_reduce_umin:
1247case Intrinsic::vector_reduce_smin:
1248case Intrinsic::vector_reduce_smax: {
1252case Intrinsic::vector_reduce_fmax:
1253case Intrinsic::vector_reduce_fmaximum:
1256case Intrinsic::vector_reduce_fmin:
1257case Intrinsic::vector_reduce_fminimum: {
1258bool PropagatesNaN = RdxID == Intrinsic::vector_reduce_fminimum ||
1259 RdxID == Intrinsic::vector_reduce_fmaximum;
1261return (!Flags.noNaNs() && !PropagatesNaN)
1271assert((!(K == RecurKind::FMin || K == RecurKind::FMax) ||
1273"nnan, nsz is expected to be set for FP min/max reduction.");
1280auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1281auto getIdentity = [&]() {
1291case RecurKind::SMax:
1292case RecurKind::SMin:
1293case RecurKind::UMax:
1294case RecurKind::UMin:
1295case RecurKind::FMax:
1296case RecurKind::FMin:
1297case RecurKind::FMinimum:
1298case RecurKind::FMaximum:
1300case RecurKind::FMulAdd:
1301case RecurKind::FAdd:
1303case RecurKind::FMul:
1314"AnyOf reduction is not supported.");
1316auto *SrcTy = cast<VectorType>(Src->getType());
1317Type *SrcEltTy = SrcTy->getElementType();
1319Value *Ops[] = {Iden, Src};
1326// TODO: Support in-order reductions based on the recurrence descriptor. 1327// All ops in the reduction inherit fast-math-flags from the recurrence 1330B.setFastMathFlags(
Desc.getFastMathFlags());
1344assert((
Desc.getRecurrenceKind() == RecurKind::FAdd ||
1345Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1346"Unexpected reduction kind");
1347assert(Src->getType()->isVectorTy() &&
"Expected a vector type");
1348assert(!Start->getType()->isVectorTy() &&
"Expected a scalar type");
1350returnB.CreateFAddReduce(Start, Src);
1356assert((
Desc.getRecurrenceKind() == RecurKind::FAdd ||
1357Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1358"Unexpected reduction kind");
1359assert(Src->getType()->isVectorTy() &&
"Expected a vector type");
1360assert(!Start->getType()->isVectorTy() &&
"Expected a scalar type");
1363auto *SrcTy = cast<VectorType>(Src->getType());
1364Value *Ops[] = {Start, Src};
1369bool IncludeWrapFlags) {
1370auto *VecOp = dyn_cast<Instruction>(
I);
1373auto *Intersection = (OpValue ==
nullptr) ? dyn_cast<Instruction>(VL[0])
1374 : dyn_cast<Instruction>(OpValue);
1377constunsigned Opcode = Intersection->getOpcode();
1378 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1380auto *Instr = dyn_cast<Instruction>(V);
1383if (OpValue ==
nullptr || Opcode == Instr->getOpcode())
1384 VecOp->andIRFlags(V);
1421auto Predicate =
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1432auto Predicate =
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1438//===----------------------------------------------------------------------===// 1439// rewriteLoopExitValues - Optimize IV users outside the loop. 1440// As a side effect, reduces the amount of IV processing within the loop. 1441//===----------------------------------------------------------------------===// 1448while (!WorkList.
empty()) {
1450// This use is outside the loop, nothing to do. 1451if (!L->contains(Curr))
1453// Do we assume it is a "hard" use which will not be eliminated easily? 1456// Otherwise, add all its users to worklist. 1457for (
constauto *U : Curr->
users()) {
1458auto *UI = cast<Instruction>(U);
1459if (Visited.
insert(UI).second)
1466// Collect information about PHI nodes which can be transformed in 1467// rewriteLoopExitValues. 1470unsignedIth;
// For which incoming value? 1481// Check whether it is possible to delete the loop after rewriting exit 1482// value. If it is possible, ignore ReplaceExitValue and do rewriting 1485BasicBlock *Preheader = L->getLoopPreheader();
1486// If there is no preheader, the loop will not be deleted. 1490// In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 1491// We obviate multiple ExitingBlocks case for simplicity. 1492// TODO: If we see testcase with multiple ExitingBlocks can be deleted 1493// after exit value rewriting, we can enhance the logic here. 1495 L->getExitingBlocks(ExitingBlocks);
1497 L->getUniqueExitBlocks(ExitBlocks);
1498if (ExitBlocks.
size() != 1 || ExitingBlocks.
size() != 1)
1503while (
PHINode *
P = dyn_cast<PHINode>(BI)) {
1506// If the Incoming value of P is found in RewritePhiSet, we know it 1507// could be rewritten to use a loop invariant value in transformation 1508// phase later. Skip it in the loop invariant check below. 1511unsigned i = Phi.Ith;
1512if (Phi.PN ==
P && (Phi.PN)->getIncomingValue(i) ==
Incoming) {
1519if (!found && (
I = dyn_cast<Instruction>(
Incoming)))
1520if (!L->hasLoopInvariantOperands(
I))
1526for (
auto *BB : L->blocks())
1528returnI.mayHaveSideEffects();
1535/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi, 1536/// and returns true if this Phi is an induction phi in the loop. When 1537/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI. 1542if (!L->getLoopPreheader())
1544if (Phi->getParent() != L->getHeader())
1555// Check a pre-condition. 1556assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1557"Indvars did not preserve LCSSA!");
1560 L->getUniqueExitBlocks(ExitBlocks);
1563// Find all values that are computed inside the loop, but used outside of it. 1564// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 1565// the exit blocks of the loop to find them. 1567// If there are no PHI nodes in this exit block, then no values defined 1568// inside the loop are used on this path, skip it. 1569PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1574// Iterate over all of the PHI nodes. 1576while ((PN = dyn_cast<PHINode>(BBI++))) {
1578continue;
// dead use, don't replace it 1583// Iterate over all of the values in all the PHI nodes. 1584for (
unsigned i = 0; i != NumPreds; ++i) {
1585// If the value being merged in is not integer or is not defined 1586// in the loop, skip it. 1588if (!isa<Instruction>(InVal))
1591// If this pred is for a subloop, not L itself, skip it. 1593continue;
// The Block is in a subloop, skip it. 1595// Check that InVal is defined in the loop. 1597if (!L->contains(Inst))
1600// Find exit values which are induction variables in the loop, and are 1601// unused in the loop, with the only use being the exit block PhiNode, 1602// and the induction variable update binary operator. 1603// The exit value can be replaced with the final value when it is cheap 1607PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1611// This is an induction PHI. Check that the only users are PHI 1612// nodes, and induction variable update binary operators. 1614 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1616 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1617 if (B && B != ID.getInductionBinOp())
1623// If it is not an induction phi, it must be an induction update 1624// binary operator with an induction phi user. 1629 PHINode *Phi = dyn_cast<PHINode>(U);
1630 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1635if (
B !=
ID.getInductionBinOp())
1640// Okay, this instruction has a user outside of the current loop 1641// and varies predictably *inside* the loop. Evaluate the value it 1642// contains when the loop exits, if possible. We prefer to start with 1643// expressions which are true for all exits (so as to maximize 1644// expression reuse by the SCEVExpander), but resort to per-exit 1645// evaluation if that fails. 1647if (isa<SCEVCouldNotCompute>(ExitValue) ||
1649 !
Rewriter.isSafeToExpand(ExitValue)) {
1650// TODO: This should probably be sunk into SCEV in some way; maybe a 1651// getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 1652// most SCEV expressions and other recurrence types (e.g. shift 1653// recurrences). Is there existing code we can reuse? 1655if (isa<SCEVCouldNotCompute>(ExitCount))
1657if (
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(Inst)))
1658if (AddRec->getLoop() == L)
1659 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1660if (isa<SCEVCouldNotCompute>(ExitValue) ||
1662 !
Rewriter.isSafeToExpand(ExitValue))
1666// Computing the value outside of the loop brings no benefit if it is 1667// definitely used inside the loop in a way which can not be optimized 1668// away. Avoid doing so unless we know we have a value which computes 1669// the ExitValue already. TODO: This should be merged into SCEV 1670// expander to leverage its knowledge of existing expressions. 1675// Check if expansions of this SCEV would count as being high cost. 1676bool HighCost =
Rewriter.isHighCostExpansion(
1679// Note that we must not perform expansions until after 1680// we query *all* the costs, because if we perform temporary expansion 1681// inbetween, one that we might not intend to keep, said expansion 1682// *may* affect cost calculation of the next SCEV's we'll query, 1683// and next SCEV may errneously get smaller cost. 1685// Collect all the candidate PHINodes to be rewritten. 1687 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1688 &*Inst->
getParent()->getFirstInsertionPt() : Inst;
1689 RewritePhiSet.
emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1694// TODO: evaluate whether it is beneficial to change how we calculate 1695// high-cost: if we have SCEV 'A' which we know we will expand, should we 1696// calculate the cost of other SCEV's after expanding SCEV 'A', thus 1697// potentially giving cost bonus to those other SCEV's? 1706// Only do the rewrite when the ExitValue can be expanded cheaply. 1707// If LoopCanBeDel is true, rewrite exit value aggressively. 1710 !LoopCanBeDel && Phi.HighCost)
1714 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1716LLVM_DEBUG(
dbgs() <<
"rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1718 <<
" LoopVal = " << *(Phi.ExpansionPoint) <<
"\n");
1721// If we reuse an instruction from a loop which is neither L nor one of 1722// its containing loops, we end up breaking LCSSA form for this loop by 1723// creating a new use of its instruction. 1724if (
auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1725if (
auto *EVL = LI->
getLoopFor(ExitInsn->getParent()))
1727assert(EVL->contains(L) &&
"LCSSA breach detected!");
1733// It's necessary to tell ScalarEvolution about this explicitly so that 1734// it can walk the def-use list and forget all SCEVs, as it may not be 1735// watching the PHI itself. Once the new exit value is in place, there 1736// may not be a def-use connection between the loop and every instruction 1737// which got a SCEVAddRecExpr for that loop. 1740// If this instruction is dead now, delete it. Don't do it now to avoid 1741// invalidating iterators. 1745// Replace PN with ExitVal if that is legal and does not break LCSSA. 1753// The insertion point instruction may have been deleted; clear it out 1754// so that the rewriter doesn't trip over it later. 1759/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for 1763assert(UF > 0 &&
"Zero unrolled factor is not supported");
1764assert(UnrolledLoop != RemainderLoop &&
1765"Unrolled and Remainder loops are expected to distinct");
1767// Get number of iterations in the original scalar loop. 1768unsigned OrigLoopInvocationWeight = 0;
1769 std::optional<unsigned> OrigAverageTripCount =
1771if (!OrigAverageTripCount)
1774// Calculate number of iterations in unrolled loop. 1775unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1776// Calculate number of iterations for remainder loop. 1777unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1780 OrigLoopInvocationWeight);
1782 OrigLoopInvocationWeight);
1785/// Utility that implements appending of loops onto a worklist. 1786/// Loops are added in preorder (analogous for reverse postorder for trees), 1787/// and the worklist is processed LIFO. 1788template <
typename RangeT>
1791// We use an internal worklist to build up the preorder traversal without 1795// We walk the initial sequence of loops in reverse because we generally want 1796// to visit defs before uses and the worklist is LIFO. 1798assert(PreOrderLoops.
empty() &&
"Must start with an empty preorder walk.");
1800"Must start with an empty preorder walk worklist.");
1804 PreOrderWorklist.
append(L->begin(), L->end());
1806 }
while (!PreOrderWorklist.
empty());
1808 Worklist.
insert(std::move(PreOrderLoops));
1809 PreOrderLoops.
clear();
1813template <
typename RangeT>
1819templatevoid llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1823llvm::appendLoopsToWorklist<Loop &>(
Loop &L,
1835 PL->addChildLoop(&New);
1842// Add all of the blocks in L to the new loop. 1845 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1847// Add all of the subloops to the new loop. 1854/// IR Values for the lower and upper bounds of a pointer evolution. We 1855/// need to use value-handles because SCEV expansion can invalidate previously 1856/// expanded values. Thus expansion of a pointer can invalidate the bounds for 1864/// Expand code for the lower and upper bound of the pointer group \p CG 1865/// in \p TheLoop. \return the values for the bounds. 1872Value *Start =
nullptr, *
End =
nullptr;
1876// If the Low and High values are themselves loop-variant, then we may want 1877// to expand the range to include those covered by the outer loop as well. 1878// There is a trade-off here with the advantage being that creating checks 1879// using the expanded range permits the runtime memory checks to be hoisted 1880// out of the outer loop. This reduces the cost of entering the inner loop, 1881// which can be significant for low trip counts. The disadvantage is that 1882// there is a chance we may now never enter the vectorized inner loop, 1883// whereas using a restricted range check could have allowed us to enter at 1884// least once. This is why the behaviour is not currently the default and is 1885// controlled by the parameter 'HoistRuntimeChecks'. 1887 isa<SCEVAddRecExpr>(
High) && isa<SCEVAddRecExpr>(
Low)) {
1888auto *HighAR = cast<SCEVAddRecExpr>(
High);
1889auto *LowAR = cast<SCEVAddRecExpr>(
Low);
1892constSCEV *Recur = LowAR->getStepRecurrence(SE);
1893if (Recur == HighAR->getStepRecurrence(SE) &&
1894 HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1897if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
1900 cast<SCEVAddRecExpr>(
High)->evaluateAtIteration(OuterExitCount, SE);
1901if (!isa<SCEVCouldNotCompute>(NewHigh)) {
1902LLVM_DEBUG(
dbgs() <<
"LAA: Expanded RT check for range to include " 1903"outer loop in order to permit hoisting\n");
1905Low = cast<SCEVAddRecExpr>(
Low)->getStart();
1906// If there is a possibility that the stride is negative then we have 1907// to generate extra checks to ensure the stride is positive. 1920 Start = Exp.expandCodeFor(
Low, PtrArithTy, Loc);
1921End = Exp.expandCodeFor(
High, PtrArithTy, Loc);
1924 Start = Builder.
CreateFreeze(Start, Start->getName() +
".fr");
1928 Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) :
nullptr;
1930return {Start,
End, StrideVal};
1933/// Turns a collection of checks into a collection of expanded upper and 1934/// lower bounds for both pointers in the check. 1940// Here we're relying on the SCEV Expander's cache to only emit code for the 1942transform(PointerChecks, std::back_inserter(ChecksWithBounds),
1948return std::make_pair(
First, Second);
1951return ChecksWithBounds;
1958// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. 1959// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible 1960auto ExpandedChecks =
1966// Our instructions might fold to a constant. 1967Value *MemoryRuntimeCheck =
nullptr;
1969for (
constauto &[
A,
B] : ExpandedChecks) {
1970// Check if two pointers (A and B) conflict where conflict is computed as: 1971// start(A) <= end(B) && start(B) <= end(A) 1973assert((
A.Start->getType()->getPointerAddressSpace() ==
1974B.End->getType()->getPointerAddressSpace()) &&
1975 (
B.Start->getType()->getPointerAddressSpace() ==
1976A.End->getType()->getPointerAddressSpace()) &&
1977"Trying to bounds check pointers with different address spaces");
1979// [A|B].Start points to the first accessed byte under base [A|B]. 1980// [A|B].End points to the last accessed byte, plus one. 1981// There is no conflict when the intervals are disjoint: 1982// NoConflict = (B.Start >= A.End) || (A.Start >= B.End) 1984// bound0 = (B.Start < A.End) 1985// bound1 = (A.Start < B.End) 1986// IsConflict = bound0 & bound1 1989Value *IsConflict = ChkBuilder.
CreateAnd(Cmp0, Cmp1,
"found.conflict");
1990if (
A.StrideToCheck) {
1992A.StrideToCheck, ConstantInt::get(
A.StrideToCheck->getType(), 0),
1994 IsConflict = ChkBuilder.
CreateOr(IsConflict, IsNegativeStride);
1996if (
B.StrideToCheck) {
1998B.StrideToCheck, ConstantInt::get(
B.StrideToCheck->getType(), 0),
2000 IsConflict = ChkBuilder.
CreateOr(IsConflict, IsNegativeStride);
2002if (MemoryRuntimeCheck) {
2004 ChkBuilder.
CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
2006 MemoryRuntimeCheck = IsConflict;
2009return MemoryRuntimeCheck;
2019// Our instructions might fold to a constant. 2020Value *MemoryRuntimeCheck =
nullptr;
2022auto &SE = *Expander.
getSE();
2023// Map to keep track of created compares, The key is the pair of operands for 2024// the compare, to allow detecting and re-using redundant compares. 2026for (
constauto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
2027Type *Ty = SinkStart->getType();
2028// Compute VF * IC * AccessSize. 2029auto *VFTimesUFTimesSize =
2031 ConstantInt::get(Ty, IC * AccessSize));
2033 Expander.
expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
2035// Check if the same compare has already been created earlier. In that case, 2036// there is no need to check it again. 2037Value *IsConflict = SeenCompares.
lookup({Diff, VFTimesUFTimesSize});
2042 ChkBuilder.
CreateICmpULT(Diff, VFTimesUFTimesSize,
"diff.check");
2043 SeenCompares.
insert({{Diff, VFTimesUFTimesSize}, IsConflict});
2047if (MemoryRuntimeCheck) {
2049 ChkBuilder.
CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
2051 MemoryRuntimeCheck = IsConflict;
2054return MemoryRuntimeCheck;
2057std::optional<IVConditionInfo>
2060auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
2061if (!TI || !TI->isConditional())
2064auto *CondI = dyn_cast<Instruction>(TI->getCondition());
2065// The case with the condition outside the loop should already be handled 2067// Allow CmpInst and TruncInsts as they may be users of load instructions 2068// and have potential for partial unswitching 2069if (!CondI || !isa<CmpInst, TruncInst>(CondI) || !L.contains(CondI))
2076 WorkList.
append(CondI->op_begin(), CondI->op_end());
2080while (!WorkList.
empty()) {
2082if (!
I || !L.contains(
I))
2085// TODO: support additional instructions. 2086if (!isa<LoadInst>(
I) && !isa<GetElementPtrInst>(
I))
2089// Do not duplicate volatile and atomic loads. 2090if (
auto *LI = dyn_cast<LoadInst>(
I))
2091if (LI->isVolatile() || LI->isAtomic())
2096if (
auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
2097// Queue the defining access to check for alias checks. 2098 AccessesToCheck.
push_back(MemUse->getDefiningAccess());
2101// MemoryDefs may clobber the location or may be atomic memory 2102// operations. Bail out. 2106 WorkList.
append(
I->op_begin(),
I->op_end());
2109if (InstToDuplicate.
empty())
2113 L.getExitingBlocks(ExitingBlocks);
2114auto HasNoClobbersOnPath =
2115 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
2118 -> std::optional<IVConditionInfo> {
2120// First, collect all blocks in the loop that are on a patch from Succ 2130while (!WorkList.
empty()) {
2132if (!L.contains(Current))
2134constauto &SeenIns = Seen.
insert(Current);
2139 *Current, [](
Instruction &
I) {
return !
I.mayHaveSideEffects(); });
2143// Require at least 2 blocks on a path through the loop. This skips 2144// paths that directly exit the loop. 2148// Next, check if there are any MemoryDefs that are on the path through 2149// the loop (in the Seen set) and they may-alias any of the locations in 2150// AccessedLocs. If that is the case, they may modify the condition and 2151// partial unswitching is not possible. 2153while (!AccessesToCheck.
empty()) {
2155auto SeenI = SeenAccesses.
insert(Current);
2159// Bail out if exceeded the threshold. 2163// MemoryUse are read-only accesses. 2164if (isa<MemoryUse>(Current))
2167// For a MemoryDef, check if is aliases any of the location feeding 2168// the original condition. 2169if (
auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
2178 AccessesToCheck.
push_back(cast<MemoryAccess>(U.getUser()));
2181// We could also allow loops with known trip counts without mustprogress, 2182// but ScalarEvolution may not be available. 2185// If the path is considered a no-op so far, check if it reaches a 2186// single exit block without any phis. This ensures no values from the 2187// loop are used outside of the loop. 2188if (
Info.PathIsNoop) {
2189for (
auto *Exiting : ExitingBlocks) {
2193if (L.contains(Succ))
2196Info.PathIsNoop &= Succ->phis().empty() &&
2197 (!
Info.ExitForPath ||
Info.ExitForPath == Succ);
2198if (!
Info.PathIsNoop)
2201"cannot have multiple exit blocks");
2202Info.ExitForPath = Succ;
2206if (!
Info.ExitForPath)
2207Info.PathIsNoop =
false;
2209Info.InstToDuplicate = InstToDuplicate;
2213// If we branch to the same successor, partial unswitching will not be 2215if (TI->getSuccessor(0) == TI->getSuccessor(1))
2218if (
autoInfo = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
2223if (
autoInfo = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
AMDGPU Register Bank Select
This is the interface for LLVM's primary stateless and local alias analysis.
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Module.h This file contains the declarations for the Module class.
iv Induction Variable Users
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static std::optional< uint64_t > getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L, uint64_t &OrigExitWeight)
Return the estimated trip count for any exiting branch which dominates the loop latch.
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
static const char * LLVMLoopDisableLICM
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
static const char * LLVMLoopDisableNonforced
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define INITIALIZE_PASS_DEPENDENCY(depName)
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Virtual Register Rewriter
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Class for arbitrary precision integers.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
static ConstantAsMetadata * get(Constant *C)
static Constant * getIntrinsicIdentity(Intrinsic::ID, Type *Ty)
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static Constant * getInfinity(Type *Ty, bool Negative=false)
static Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
UnreachableInst * CreateUnreachable()
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
FastMathFlags getFastMathFlags() const
Get the flags to be applied to created floating point ops.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
ConstantInt * getFalse()
Get the constant value for i1 false.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a sequential vector fmul reduction intrinsic of the source vector.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
std::vector< Loop * >::const_iterator iterator
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Represents a single loop in the control flow graph.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
StringRef getString() const
static MDString * get(LLVMContext &Context, StringRef Str)
BasicBlock * getBlock() const
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Root of the metadata hierarchy.
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
ScalarEvolution * getSE()
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
The instances of the Type class are immutable: once they are created, they are never changed.
const fltSemantics & getFltSemantics() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
Value * createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Emit a VP reduction intrinsic call for recurrence kind.
std::pair< iterator, bool > insert(const ValueT &V)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander, bool HoistRuntimeChecks=false)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-positive in loop L.
std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
auto successors(const MachineBasicBlock *BB)
void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Value * getReductionIdentity(Intrinsic::ID RdxID, Type *Ty, FastMathFlags FMF)
Given information about an @llvm.vector.reduce.
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
SmallVector< BasicBlock *, 16 > collectChildrenInLoop(DominatorTree *DT, DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::IAnyOf or RecurKind...
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
TransformationMode hasVectorizeTransformation(const Loop *L)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
auto reverse(ContainerTy &&C)
constexpr Intrinsic::ID getReductionIntrinsicID(RecurKind RK)
Returns the llvm.vector.reduce intrinsic that corresponds to the recurrence kind.
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
cl::opt< unsigned > SCEVCheapExpansionBudget
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
TransformationMode hasUnrollTransformation(const Loop *L)
TransformationMode hasDistributeTransformation(const Loop *L)
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always positive in loop L.
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
TransformationMode hasLICMVersioningTransformation(const Loop *L)
bool VerifyMemorySSA
Enables verification of MemorySSA.
Value * createFindLastIVReduction(IRBuilderBase &B, Value *Src, const RecurrenceDescriptor &Desc)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::IFindLastIV or Recu...
TransformationMode
The mode sets how eager a transformation should be applied.
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
@ TM_SuppressedByUser
The transformation must not be applied.
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
@ TM_Disable
The transformation should not be applied.
@ TM_Enable
The transformation should be applied without considering a cost model.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
RecurKind
These are the kinds of recurrences that we support.
bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
DWARFExpression::Operation Op
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
constexpr unsigned BitWidth
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
auto predecessors(const MachineBasicBlock *BB)
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
IR Values for the lower and upper bounds of a pointer evolution.
TrackingVH< Value > Start
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
const SCEV * ExpansionSCEV
Instruction * ExpansionPoint
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.