Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
17#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/Analysis/LoopInfo.h"
20#include "llvm/Analysis/OptimizationRemarkEmitter.h"
21#include "llvm/Analysis/ScalarEvolutionExpressions.h"
22#include "llvm/Analysis/TargetLibraryInfo.h"
23#include "llvm/Analysis/TargetTransformInfo.h"
24#include "llvm/Analysis/ValueTracking.h"
25#include "llvm/Analysis/VectorUtils.h"
26#include "llvm/IR/IntrinsicInst.h"
27#include "llvm/IR/PatternMatch.h"
28#include "llvm/Transforms/Utils/SizeOpts.h"
29#include "llvm/Transforms/Vectorize/LoopVectorize.h"
30
31using namespacellvm;
32using namespacePatternMatch;
33
34#define LV_NAME "loop-vectorize"
35#define DEBUG_TYPE LV_NAME
36
37staticcl::opt<bool>
38EnableIfConversion("enable-if-conversion",cl::init(true),cl::Hidden,
39cl::desc("Enable if-conversion during vectorization."));
40
41staticcl::opt<bool>
42AllowStridedPointerIVs("lv-strided-pointer-ivs",cl::init(false),cl::Hidden,
43cl::desc("Enable recognition of non-constant strided "
44"pointer induction variables."));
45
46namespacellvm {
47cl::opt<bool>
48HintsAllowReordering("hints-allow-reordering",cl::init(true),cl::Hidden,
49cl::desc("Allow enabling loop hints to reorder "
50"FP operations during vectorization."));
51}// namespace llvm
52
53// TODO: Move size-based thresholds out of legality checking, make cost based
54// decisions instead of hard thresholds.
55staticcl::opt<unsigned>VectorizeSCEVCheckThreshold(
56"vectorize-scev-check-threshold",cl::init(16),cl::Hidden,
57cl::desc("The maximum number of SCEV checks allowed."));
58
59staticcl::opt<unsigned>PragmaVectorizeSCEVCheckThreshold(
60"pragma-vectorize-scev-check-threshold",cl::init(128),cl::Hidden,
61cl::desc("The maximum number of SCEV checks allowed with a "
62"vectorize(enable) pragma"));
63
64staticcl::opt<LoopVectorizeHints::ScalableForceKind>
65ForceScalableVectorization(
66"scalable-vectorization",cl::init(LoopVectorizeHints::SK_Unspecified),
67cl::Hidden,
68cl::desc("Control whether the compiler can use scalable vectors to "
69"vectorize a loop"),
70cl::values(
71clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly,"off",
72"Scalable vectorization is disabled."),
73clEnumValN(
74LoopVectorizeHints::SK_PreferScalable,"preferred",
75"Scalable vectorization is available and favored when the "
76"cost is inconclusive."),
77clEnumValN(
78LoopVectorizeHints::SK_PreferScalable,"on",
79"Scalable vectorization is available and favored when the "
80"cost is inconclusive.")));
81
82staticcl::opt<bool>EnableHistogramVectorization(
83"enable-histogram-loop-vectorization",cl::init(false),cl::Hidden,
84cl::desc("Enables autovectorization of some loops containing histograms"));
85
86/// Maximum vectorization interleave count.
87staticconstunsignedMaxInterleaveFactor = 16;
88
89namespacellvm {
90
91bool LoopVectorizeHints::Hint::validate(unsigned Val) {
92switch (Kind) {
93case HK_WIDTH:
94returnisPowerOf2_32(Val) && Val <=VectorizerParams::MaxVectorWidth;
95case HK_INTERLEAVE:
96returnisPowerOf2_32(Val) && Val <=MaxInterleaveFactor;
97case HK_FORCE:
98return (Val <= 1);
99case HK_ISVECTORIZED:
100case HK_PREDICATE:
101case HK_SCALABLE:
102return (Val == 0 || Val == 1);
103 }
104returnfalse;
105}
106
107LoopVectorizeHints::LoopVectorizeHints(constLoop *L,
108bool InterleaveOnlyWhenForced,
109OptimizationRemarkEmitter &ORE,
110constTargetTransformInfo *TTI)
111 : Width("vectorize.width",VectorizerParams::VectorizationFactor, HK_WIDTH),
112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force("vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
118// Populate values with existing loop metadata.
119 getHintsFromMetadata();
120
121// force-vector-interleave overrides DisableInterleaving.
122if (VectorizerParams::isInterleaveForced())
123 Interleave.Value =VectorizerParams::VectorizationInterleave;
124
125// If the metadata doesn't explicitly specify whether to enable scalable
126// vectorization, then decide based on the following criteria (increasing
127// level of priority):
128// - Target default
129// - Metadata width
130// - Force option (always overrides)
131if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value ==SK_Unspecified) {
132if (TTI)
133 Scalable.Value =TTI->enableScalableVectorization() ?SK_PreferScalable
134 :SK_FixedWidthOnly;
135
136if (Width.Value)
137// If the width is set, but the metadata says nothing about the scalable
138// property, then assume it concerns only a fixed-width UserVF.
139// If width is not set, the flag takes precedence.
140 Scalable.Value =SK_FixedWidthOnly;
141 }
142
143// If the flag is set to force any use of scalable vectors, override the loop
144// hints.
145if (ForceScalableVectorization.getValue() !=
146LoopVectorizeHints::SK_Unspecified)
147 Scalable.Value =ForceScalableVectorization.getValue();
148
149// Scalable vectorization is disabled if no preference is specified.
150if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value ==SK_Unspecified)
151 Scalable.Value =SK_FixedWidthOnly;
152
153if (IsVectorized.Value != 1)
154// If the vectorization width and interleaving count are both 1 then
155// consider the loop to have been already vectorized because there's
156// nothing more that we can do.
157 IsVectorized.Value =
158getWidth() ==ElementCount::getFixed(1) &&getInterleave() == 1;
159LLVM_DEBUG(if (InterleaveOnlyWhenForced &&getInterleave() == 1)dbgs()
160 <<"LV: Interleaving disabled by the pass manager\n");
161}
162
163voidLoopVectorizeHints::setAlreadyVectorized() {
164LLVMContext &Context = TheLoop->getHeader()->getContext();
165
166MDNode *IsVectorizedMD =MDNode::get(
167 Context,
168 {MDString::get(Context,"llvm.loop.isvectorized"),
169ConstantAsMetadata::get(ConstantInt::get(Context,APInt(32, 1)))});
170MDNode *LoopID = TheLoop->getLoopID();
171MDNode *NewLoopID =
172makePostTransformationMetadata(Context, LoopID,
173 {Twine(Prefix(),"vectorize.").str(),
174Twine(Prefix(),"interleave.").str()},
175 {IsVectorizedMD});
176 TheLoop->setLoopID(NewLoopID);
177
178// Update internal cache.
179 IsVectorized.Value = 1;
180}
181
182boolLoopVectorizeHints::allowVectorization(
183Function *F,Loop *L,bool VectorizeOnlyWhenForced) const{
184if (getForce() ==LoopVectorizeHints::FK_Disabled) {
185LLVM_DEBUG(dbgs() <<"LV: Not vectorizing: #pragma vectorize disable.\n");
186emitRemarkWithHints();
187returnfalse;
188 }
189
190if (VectorizeOnlyWhenForced &&getForce() !=LoopVectorizeHints::FK_Enabled) {
191LLVM_DEBUG(dbgs() <<"LV: Not vectorizing: No #pragma vectorize enable.\n");
192emitRemarkWithHints();
193returnfalse;
194 }
195
196if (getIsVectorized() == 1) {
197LLVM_DEBUG(dbgs() <<"LV: Not vectorizing: Disabled/already vectorized.\n");
198// FIXME: Add interleave.disable metadata. This will allow
199// vectorize.disable to be used without disabling the pass and errors
200// to differentiate between disabled vectorization and a width of 1.
201 ORE.emit([&]() {
202returnOptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
203"AllDisabled", L->getStartLoc(),
204 L->getHeader())
205 <<"loop not vectorized: vectorization and interleaving are "
206"explicitly disabled, or the loop has already been "
207"vectorized";
208 });
209returnfalse;
210 }
211
212returntrue;
213}
214
215voidLoopVectorizeHints::emitRemarkWithHints() const{
216using namespaceore;
217
218 ORE.emit([&]() {
219if (Force.Value ==LoopVectorizeHints::FK_Disabled)
220returnOptimizationRemarkMissed(LV_NAME,"MissedExplicitlyDisabled",
221 TheLoop->getStartLoc(),
222 TheLoop->getHeader())
223 <<"loop not vectorized: vectorization is explicitly disabled";
224
225OptimizationRemarkMissed R(LV_NAME,"MissedDetails", TheLoop->getStartLoc(),
226 TheLoop->getHeader());
227 R <<"loop not vectorized";
228if (Force.Value ==LoopVectorizeHints::FK_Enabled) {
229 R <<" (Force=" << NV("Force", true);
230 if (Width.Value != 0)
231 R <<", Vector Width=" << NV("VectorWidth", getWidth());
232 if (getInterleave() != 0)
233 R <<", Interleave Count=" << NV("InterleaveCount", getInterleave());
234 R <<")";
235 }
236return R;
237 });
238}
239
240constchar *LoopVectorizeHints::vectorizeAnalysisPassName() const{
241if (getWidth() ==ElementCount::getFixed(1))
242returnLV_NAME;
243if (getForce() ==LoopVectorizeHints::FK_Disabled)
244returnLV_NAME;
245if (getForce() ==LoopVectorizeHints::FK_Undefined &&getWidth().isZero())
246returnLV_NAME;
247returnOptimizationRemarkAnalysis::AlwaysPrint;
248}
249
250boolLoopVectorizeHints::allowReordering() const{
251// Allow the vectorizer to change the order of operations if enabling
252// loop hints are provided
253ElementCount EC =getWidth();
254returnHintsAllowReordering &&
255 (getForce() ==LoopVectorizeHints::FK_Enabled ||
256 EC.getKnownMinValue() > 1);
257}
258
259void LoopVectorizeHints::getHintsFromMetadata() {
260MDNode *LoopID = TheLoop->getLoopID();
261if (!LoopID)
262return;
263
264// First operand should refer to the loop id itself.
265assert(LoopID->getNumOperands() > 0 &&"requires at least one operand");
266assert(LoopID->getOperand(0) == LoopID &&"invalid loop id");
267
268for (constMDOperand &MDO :llvm::drop_begin(LoopID->operands())) {
269constMDString *S =nullptr;
270SmallVector<Metadata *, 4> Args;
271
272// The expected hint is either a MDString or a MDNode with the first
273// operand a MDString.
274if (constMDNode *MD = dyn_cast<MDNode>(MDO)) {
275if (!MD || MD->getNumOperands() == 0)
276continue;
277 S = dyn_cast<MDString>(MD->getOperand(0));
278for (unsignedIdx = 1;Idx < MD->getNumOperands(); ++Idx)
279 Args.push_back(MD->getOperand(Idx));
280 }else {
281 S = dyn_cast<MDString>(MDO);
282assert(Args.size() == 0 &&"too many arguments for MDString");
283 }
284
285if (!S)
286continue;
287
288// Check if the hint starts with the loop metadata prefix.
289StringRefName = S->getString();
290if (Args.size() == 1)
291 setHint(Name, Args[0]);
292 }
293}
294
295void LoopVectorizeHints::setHint(StringRefName,Metadata *Arg) {
296if (!Name.starts_with(Prefix()))
297return;
298Name =Name.substr(Prefix().size(),StringRef::npos);
299
300constConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
301if (!C)
302return;
303unsigned Val =C->getZExtValue();
304
305 Hint *Hints[] = {&Width, &Interleave, &Force,
306 &IsVectorized, &Predicate, &Scalable};
307for (auto *H : Hints) {
308if (Name ==H->Name) {
309if (H->validate(Val))
310H->Value = Val;
311else
312LLVM_DEBUG(dbgs() <<"LV: ignoring invalid hint '" <<Name <<"'\n");
313break;
314 }
315 }
316}
317
318// Return true if the inner loop \p Lp is uniform with regard to the outer loop
319// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
320// executing the inner loop will execute the same iterations). This check is
321// very constrained for now but it will be relaxed in the future. \p Lp is
322// considered uniform if it meets all the following conditions:
323// 1) it has a canonical IV (starting from 0 and with stride 1),
324// 2) its latch terminator is a conditional branch and,
325// 3) its latch condition is a compare instruction whose operands are the
326// canonical IV and an OuterLp invariant.
327// This check doesn't take into account the uniformity of other conditions not
328// related to the loop latch because they don't affect the loop uniformity.
329//
330// NOTE: We decided to keep all these checks and its associated documentation
331// together so that we can easily have a picture of the current supported loop
332// nests. However, some of the current checks don't depend on \p OuterLp and
333// would be redundantly executed for each \p Lp if we invoked this function for
334// different candidate outer loops. This is not the case for now because we
335// don't currently have the infrastructure to evaluate multiple candidate outer
336// loops and \p OuterLp will be a fixed parameter while we only support explicit
337// outer loop vectorization. It's also very likely that these checks go away
338// before introducing the aforementioned infrastructure. However, if this is not
339// the case, we should move the \p OuterLp independent checks to a separate
340// function that is only executed once for each \p Lp.
341staticboolisUniformLoop(Loop *Lp,Loop *OuterLp) {
342assert(Lp->getLoopLatch() &&"Expected loop with a single latch.");
343
344// If Lp is the outer loop, it's uniform by definition.
345if (Lp == OuterLp)
346returntrue;
347assert(OuterLp->contains(Lp) &&"OuterLp must contain Lp.");
348
349// 1.
350PHINode *IV = Lp->getCanonicalInductionVariable();
351if (!IV) {
352LLVM_DEBUG(dbgs() <<"LV: Canonical IV not found.\n");
353returnfalse;
354 }
355
356// 2.
357BasicBlock *Latch = Lp->getLoopLatch();
358auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
359if (!LatchBr || LatchBr->isUnconditional()) {
360LLVM_DEBUG(dbgs() <<"LV: Unsupported loop latch branch.\n");
361returnfalse;
362 }
363
364// 3.
365auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
366if (!LatchCmp) {
367LLVM_DEBUG(
368dbgs() <<"LV: Loop latch condition is not a compare instruction.\n");
369returnfalse;
370 }
371
372Value *CondOp0 = LatchCmp->getOperand(0);
373Value *CondOp1 = LatchCmp->getOperand(1);
374Value *IVUpdate =IV->getIncomingValueForBlock(Latch);
375if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
376 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
377LLVM_DEBUG(dbgs() <<"LV: Loop latch condition is not uniform.\n");
378returnfalse;
379 }
380
381returntrue;
382}
383
384// Return true if \p Lp and all its nested loops are uniform with regard to \p
385// OuterLp.
386staticboolisUniformLoopNest(Loop *Lp,Loop *OuterLp) {
387if (!isUniformLoop(Lp, OuterLp))
388returnfalse;
389
390// Check if nested loops are uniform.
391for (Loop *SubLp : *Lp)
392if (!isUniformLoopNest(SubLp, OuterLp))
393returnfalse;
394
395returntrue;
396}
397
398staticType *convertPointerToIntegerType(constDataLayout &DL,Type *Ty) {
399if (Ty->isPointerTy())
400returnDL.getIntPtrType(Ty);
401
402// It is possible that char's or short's overflow when we ask for the loop's
403// trip count, work around this by changing the type size.
404if (Ty->getScalarSizeInBits() < 32)
405returnType::getInt32Ty(Ty->getContext());
406
407return Ty;
408}
409
410staticType *getWiderType(constDataLayout &DL,Type *Ty0,Type *Ty1) {
411 Ty0 =convertPointerToIntegerType(DL, Ty0);
412 Ty1 =convertPointerToIntegerType(DL, Ty1);
413if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
414return Ty0;
415return Ty1;
416}
417
418/// Check that the instruction has outside loop users and is not an
419/// identified reduction variable.
420staticboolhasOutsideLoopUser(constLoop *TheLoop,Instruction *Inst,
421SmallPtrSetImpl<Value *> &AllowedExit) {
422// Reductions, Inductions and non-header phis are allowed to have exit users. All
423// other instructions must not have external users.
424if (!AllowedExit.count(Inst))
425// Check that all of the users of the loop are inside the BB.
426for (User *U : Inst->users()) {
427Instruction *UI = cast<Instruction>(U);
428// This user may be a reduction exit value.
429if (!TheLoop->contains(UI)) {
430LLVM_DEBUG(dbgs() <<"LV: Found an outside user for : " << *UI <<'\n');
431returntrue;
432 }
433 }
434returnfalse;
435}
436
437/// Returns true if A and B have same pointer operands or same SCEVs addresses
438staticboolstoreToSameAddress(ScalarEvolution *SE,StoreInst *A,
439StoreInst *B) {
440// Compare store
441if (A ==B)
442returntrue;
443
444// Otherwise Compare pointers
445Value *APtr =A->getPointerOperand();
446Value *BPtr =B->getPointerOperand();
447if (APtr == BPtr)
448returntrue;
449
450// Otherwise compare address SCEVs
451return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
452}
453
454intLoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
455Value *Ptr) const{
456// FIXME: Currently, the set of symbolic strides is sometimes queried before
457// it's collected. This happens from canVectorizeWithIfConvert, when the
458// pointer is checked to reference consecutive elements suitable for a
459// masked access.
460constauto &Strides =
461 LAI ? LAI->getSymbolicStrides() :DenseMap<Value *, const SCEV *>();
462
463bool CanAddPredicate = !llvm::shouldOptimizeForSize(
464 TheLoop->getHeader(), PSI, BFI,PGSOQueryType::IRPass);
465int Stride =getPtrStride(PSE, AccessTy,Ptr, TheLoop, Strides,
466 CanAddPredicate,false).value_or(0);
467if (Stride == 1 || Stride == -1)
468return Stride;
469return 0;
470}
471
472boolLoopVectorizationLegality::isInvariant(Value *V) const{
473return LAI->isInvariant(V);
474}
475
476namespace{
477/// A rewriter to build the SCEVs for each of the VF lanes in the expected
478/// vectorized loop, which can then be compared to detect their uniformity. This
479/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
480/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
481/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
482/// uniformity.
483classSCEVAddRecForUniformityRewriter
484 :publicSCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
485 /// Multiplier to be applied to the step of AddRecs in TheLoop.
486unsigned StepMultiplier;
487
488 /// Offset to be added to the AddRecs in TheLoop.
489unsigned Offset;
490
491 /// Loop for which to rewrite AddRecsFor.
492Loop *TheLoop;
493
494 /// Is any sub-expressions not analyzable w.r.t. uniformity?
495bool CannotAnalyze =false;
496
497bool canAnalyze() const{return !CannotAnalyze; }
498
499public:
500 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE,unsigned StepMultiplier,
501unsigned Offset,Loop *TheLoop)
502 :SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
503 TheLoop(TheLoop) {}
504
505constSCEV *visitAddRecExpr(constSCEVAddRecExpr *Expr) {
506assert(Expr->getLoop() == TheLoop &&
507"addrec outside of TheLoop must be invariant and should have been "
508"handled earlier");
509// Build a new AddRec by multiplying the step by StepMultiplier and
510// incrementing the start by Offset * step.
511Type *Ty = Expr->getType();
512constSCEV *Step = Expr->getStepRecurrence(SE);
513if (!SE.isLoopInvariant(Step, TheLoop)) {
514 CannotAnalyze =true;
515return Expr;
516 }
517constSCEV *NewStep =
518 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
519constSCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
520constSCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
521return SE.getAddRecExpr(NewStart, NewStep, TheLoop,SCEV::FlagAnyWrap);
522 }
523
524constSCEV *visit(constSCEV *S) {
525if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
526return S;
527returnSCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
528 }
529
530constSCEV *visitUnknown(constSCEVUnknown *S) {
531if (SE.isLoopInvariant(S, TheLoop))
532return S;
533// The value could vary across iterations.
534 CannotAnalyze =true;
535return S;
536 }
537
538constSCEV *visitCouldNotCompute(constSCEVCouldNotCompute *S) {
539// Could not analyze the expression.
540 CannotAnalyze =true;
541return S;
542 }
543
544staticconstSCEV *rewrite(constSCEV *S,ScalarEvolution &SE,
545unsigned StepMultiplier,unsigned Offset,
546Loop *TheLoop) {
547 /// Bail out if the expression does not contain an UDiv expression.
548 /// Uniform values which are not loop invariant require operations to strip
549 /// out the lowest bits. For now just look for UDivs and use it to avoid
550 /// re-writing UDIV-free expressions for other lanes to limit compile time.
551if (!SCEVExprContains(S,
552 [](constSCEV *S) {return isa<SCEVUDivExpr>(S); }))
553return SE.getCouldNotCompute();
554
555 SCEVAddRecForUniformityRewriterRewriter(SE, StepMultiplier, Offset,
556 TheLoop);
557constSCEV *Result =Rewriter.visit(S);
558
559if (Rewriter.canAnalyze())
560returnResult;
561return SE.getCouldNotCompute();
562 }
563};
564
565}// namespace
566
567boolLoopVectorizationLegality::isUniform(Value *V,ElementCount VF) const{
568if (isInvariant(V))
569returntrue;
570if (VF.isScalable())
571returnfalse;
572if (VF.isScalar())
573returntrue;
574
575// Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
576// never considered uniform.
577auto *SE = PSE.getSE();
578if (!SE->isSCEVable(V->getType()))
579returnfalse;
580constSCEV *S = SE->getSCEV(V);
581
582// Rewrite AddRecs in TheLoop to step by VF and check if the expression for
583// lane 0 matches the expressions for all other lanes.
584unsigned FixedVF = VF.getKnownMinValue();
585constSCEV *FirstLaneExpr =
586 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
587if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
588returnfalse;
589
590// Make sure the expressions for lanes FixedVF-1..1 match the expression for
591// lane 0. We check lanes in reverse order for compile-time, as frequently
592// checking the last lane is sufficient to rule out uniformity.
593returnall_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsignedI) {
594constSCEV *IthLaneExpr =
595 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF,I, TheLoop);
596return FirstLaneExpr == IthLaneExpr;
597 });
598}
599
600boolLoopVectorizationLegality::isUniformMemOp(Instruction &I,
601ElementCount VF) const{
602Value *Ptr =getLoadStorePointerOperand(&I);
603if (!Ptr)
604returnfalse;
605// Note: There's nothing inherent which prevents predicated loads and
606// stores from being uniform. The current lowering simply doesn't handle
607// it; in particular, the cost model distinguishes scatter/gather from
608// scalar w/predication, and we currently rely on the scalar path.
609returnisUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
610}
611
612bool LoopVectorizationLegality::canVectorizeOuterLoop() {
613assert(!TheLoop->isInnermost() &&"We are not vectorizing an outer loop.");
614// Store the result and return it at the end instead of exiting early, in case
615// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
616bool Result =true;
617bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
618
619for (BasicBlock *BB : TheLoop->blocks()) {
620// Check whether the BB terminator is a BranchInst. Any other terminator is
621// not supported yet.
622auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
623if (!Br) {
624reportVectorizationFailure("Unsupported basic block terminator",
625"loop control flow is not understood by vectorizer",
626"CFGNotUnderstood", ORE, TheLoop);
627if (DoExtraAnalysis)
628 Result =false;
629else
630returnfalse;
631 }
632
633// Check whether the BranchInst is a supported one. Only unconditional
634// branches, conditional branches with an outer loop invariant condition or
635// backedges are supported.
636// FIXME: We skip these checks when VPlan predication is enabled as we
637// want to allow divergent branches. This whole check will be removed
638// once VPlan predication is on by default.
639if (Br && Br->isConditional() &&
640 !TheLoop->isLoopInvariant(Br->getCondition()) &&
641 !LI->isLoopHeader(Br->getSuccessor(0)) &&
642 !LI->isLoopHeader(Br->getSuccessor(1))) {
643reportVectorizationFailure("Unsupported conditional branch",
644"loop control flow is not understood by vectorizer",
645"CFGNotUnderstood", ORE, TheLoop);
646if (DoExtraAnalysis)
647 Result =false;
648else
649returnfalse;
650 }
651 }
652
653// Check whether inner loops are uniform. At this point, we only support
654// simple outer loops scenarios with uniform nested loops.
655if (!isUniformLoopNest(TheLoop/*loop nest*/,
656 TheLoop/*context outer loop*/)) {
657reportVectorizationFailure("Outer loop contains divergent loops",
658"loop control flow is not understood by vectorizer",
659"CFGNotUnderstood", ORE, TheLoop);
660if (DoExtraAnalysis)
661Result =false;
662else
663returnfalse;
664 }
665
666// Check whether we are able to set up outer loop induction.
667if (!setupOuterLoopInductions()) {
668reportVectorizationFailure("Unsupported outer loop Phi(s)",
669"UnsupportedPhi", ORE, TheLoop);
670if (DoExtraAnalysis)
671Result =false;
672else
673returnfalse;
674 }
675
676returnResult;
677}
678
679void LoopVectorizationLegality::addInductionPhi(
680PHINode *Phi,constInductionDescriptor &ID,
681SmallPtrSetImpl<Value *> &AllowedExit) {
682 Inductions[Phi] =ID;
683
684// In case this induction also comes with casts that we know we can ignore
685// in the vectorized loop body, record them here. All casts could be recorded
686// here for ignoring, but suffices to record only the first (as it is the
687// only one that may bw used outside the cast sequence).
688constSmallVectorImpl<Instruction *> &Casts =ID.getCastInsts();
689if (!Casts.empty())
690 InductionCastsToIgnore.insert(*Casts.begin());
691
692Type *PhiTy =Phi->getType();
693constDataLayout &DL =Phi->getDataLayout();
694
695// Get the widest type.
696if (!PhiTy->isFloatingPointTy()) {
697if (!WidestIndTy)
698 WidestIndTy =convertPointerToIntegerType(DL, PhiTy);
699else
700 WidestIndTy =getWiderType(DL, PhiTy, WidestIndTy);
701 }
702
703// Int inductions are special because we only allow one IV.
704if (ID.getKind() ==InductionDescriptor::IK_IntInduction &&
705ID.getConstIntStepValue() &&ID.getConstIntStepValue()->isOne() &&
706 isa<Constant>(ID.getStartValue()) &&
707 cast<Constant>(ID.getStartValue())->isNullValue()) {
708
709// Use the phi node with the widest type as induction. Use the last
710// one if there are multiple (no good reason for doing this other
711// than it is expedient). We've checked that it begins at zero and
712// steps by one, so this is a canonical induction variable.
713if (!PrimaryInduction || PhiTy == WidestIndTy)
714 PrimaryInduction =Phi;
715 }
716
717// Both the PHI node itself, and the "post-increment" value feeding
718// back into the PHI node may have external users.
719// We can allow those uses, except if the SCEVs we have for them rely
720// on predicates that only hold within the loop, since allowing the exit
721// currently means re-using this SCEV outside the loop (see PR33706 for more
722// details).
723if (PSE.getPredicate().isAlwaysTrue()) {
724 AllowedExit.insert(Phi);
725 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
726 }
727
728LLVM_DEBUG(dbgs() <<"LV: Found an induction variable.\n");
729}
730
731bool LoopVectorizationLegality::setupOuterLoopInductions() {
732BasicBlock *Header = TheLoop->getHeader();
733
734// Returns true if a given Phi is a supported induction.
735auto IsSupportedPhi = [&](PHINode &Phi) ->bool {
736InductionDescriptorID;
737if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE,ID) &&
738ID.getKind() ==InductionDescriptor::IK_IntInduction) {
739 addInductionPhi(&Phi,ID, AllowedExit);
740returntrue;
741 }
742// Bail out for any Phi in the outer loop header that is not a supported
743// induction.
744LLVM_DEBUG(
745dbgs() <<"LV: Found unsupported PHI for outer loop vectorization.\n");
746returnfalse;
747 };
748
749returnllvm::all_of(Header->phis(), IsSupportedPhi);
750}
751
752/// Checks if a function is scalarizable according to the TLI, in
753/// the sense that it should be vectorized and then expanded in
754/// multiple scalar calls. This is represented in the
755/// TLI via mappings that do not specify a vector name, as in the
756/// following example:
757///
758/// const VecDesc VecIntrinsics[] = {
759/// {"llvm.phx.abs.i32", "", 4}
760/// };
761staticboolisTLIScalarize(constTargetLibraryInfo &TLI,constCallInst &CI) {
762constStringRef ScalarName = CI.getCalledFunction()->getName();
763bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
764// Check that all known VFs are not associated to a vector
765// function, i.e. the vector name is emty.
766if (Scalarize) {
767ElementCount WidestFixedVF, WidestScalableVF;
768 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
769for (ElementCount VF =ElementCount::getFixed(2);
770ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
771 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
772for (ElementCount VF =ElementCount::getScalable(1);
773ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
774 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
775assert((WidestScalableVF.isZero() || !Scalarize) &&
776"Caller may decide to scalarize a variant using a scalable VF");
777 }
778return Scalarize;
779}
780
781/// Returns true if the call return type `Ty` can be widened by the loop
782/// vectorizer.
783staticboolcanWidenCallReturnType(Type *Ty) {
784auto *StructTy = dyn_cast<StructType>(Ty);
785// TODO: Remove the homogeneous types restriction. This is just an initial
786// simplification. When we want to support things like the overflow intrinsics
787// we will have to lift this restriction.
788if (StructTy && !StructTy->containsHomogeneousTypes())
789returnfalse;
790returncanVectorizeTy(StructTy);
791}
792
793bool LoopVectorizationLegality::canVectorizeInstrs() {
794BasicBlock *Header = TheLoop->getHeader();
795
796// For each block in the loop.
797for (BasicBlock *BB : TheLoop->blocks()) {
798// Scan the instructions in the block and look for hazards.
799for (Instruction &I : *BB) {
800if (auto *Phi = dyn_cast<PHINode>(&I)) {
801Type *PhiTy = Phi->getType();
802// Check that this PHI type is allowed.
803if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
804 !PhiTy->isPointerTy()) {
805reportVectorizationFailure("Found a non-int non-pointer PHI",
806"loop control flow is not understood by vectorizer",
807"CFGNotUnderstood", ORE, TheLoop);
808returnfalse;
809 }
810
811// If this PHINode is not in the header block, then we know that we
812// can convert it to select during if-conversion. No need to check if
813// the PHIs in this block are induction or reduction variables.
814if (BB != Header) {
815// Non-header phi nodes that have outside uses can be vectorized. Add
816// them to the list of allowed exits.
817// Unsafe cyclic dependencies with header phis are identified during
818// legalization for reduction, induction and fixed order
819// recurrences.
820 AllowedExit.insert(&I);
821continue;
822 }
823
824// We only allow if-converted PHIs with exactly two incoming values.
825if (Phi->getNumIncomingValues() != 2) {
826reportVectorizationFailure("Found an invalid PHI",
827"loop control flow is not understood by vectorizer",
828"CFGNotUnderstood", ORE, TheLoop, Phi);
829returnfalse;
830 }
831
832RecurrenceDescriptor RedDes;
833if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
834 DT, PSE.getSE())) {
835 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
836 AllowedExit.insert(RedDes.getLoopExitInstr());
837 Reductions[Phi] = RedDes;
838continue;
839 }
840
841// We prevent matching non-constant strided pointer IVS to preserve
842// historical vectorizer behavior after a generalization of the
843// IVDescriptor code. The intent is to remove this check, but we
844// have to fix issues around code quality for such loops first.
845auto IsDisallowedStridedPointerInduction =
846 [](constInductionDescriptor &ID) {
847if (AllowStridedPointerIVs)
848returnfalse;
849returnID.getKind() ==InductionDescriptor::IK_PtrInduction &&
850ID.getConstIntStepValue() ==nullptr;
851 };
852
853// TODO: Instead of recording the AllowedExit, it would be good to
854// record the complementary set: NotAllowedExit. These include (but may
855// not be limited to):
856// 1. Reduction phis as they represent the one-before-last value, which
857// is not available when vectorized
858// 2. Induction phis and increment when SCEV predicates cannot be used
859// outside the loop - see addInductionPhi
860// 3. Non-Phis with outside uses when SCEV predicates cannot be used
861// outside the loop - see call to hasOutsideLoopUser in the non-phi
862// handling below
863// 4. FixedOrderRecurrence phis that can possibly be handled by
864// extraction.
865// By recording these, we can then reason about ways to vectorize each
866// of these NotAllowedExit.
867InductionDescriptorID;
868if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE,ID) &&
869 !IsDisallowedStridedPointerInduction(ID)) {
870 addInductionPhi(Phi,ID, AllowedExit);
871 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
872continue;
873 }
874
875if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
876 AllowedExit.insert(Phi);
877 FixedOrderRecurrences.insert(Phi);
878continue;
879 }
880
881// As a last resort, coerce the PHI to a AddRec expression
882// and re-try classifying it a an induction PHI.
883if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE,ID,true) &&
884 !IsDisallowedStridedPointerInduction(ID)) {
885 addInductionPhi(Phi,ID, AllowedExit);
886continue;
887 }
888
889reportVectorizationFailure("Found an unidentified PHI",
890"value that could not be identified as "
891"reduction is used outside the loop",
892"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
893returnfalse;
894 }// end of PHI handling
895
896// We handle calls that:
897// * Are debug info intrinsics.
898// * Have a mapping to an IR intrinsic.
899// * Have a vector version available.
900auto *CI = dyn_cast<CallInst>(&I);
901
902if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
903 !isa<DbgInfoIntrinsic>(CI) &&
904 !(CI->getCalledFunction() && TLI &&
905 (!VFDatabase::getMappings(*CI).empty() ||
906isTLIScalarize(*TLI, *CI)))) {
907// If the call is a recognized math libary call, it is likely that
908// we can vectorize it given loosened floating-point constraints.
909LibFuncFunc;
910bool IsMathLibCall =
911 TLI && CI->getCalledFunction() &&
912 CI->getType()->isFloatingPointTy() &&
913 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
914 TLI->hasOptimizedCodeGen(Func);
915
916if (IsMathLibCall) {
917// TODO: Ideally, we should not use clang-specific language here,
918// but it's hard to provide meaningful yet generic advice.
919// Also, should this be guarded by allowExtraAnalysis() and/or be part
920// of the returned info from isFunctionVectorizable()?
921reportVectorizationFailure(
922"Found a non-intrinsic callsite",
923"library call cannot be vectorized. "
924"Try compiling with -fno-math-errno, -ffast-math, "
925"or similar flags",
926"CantVectorizeLibcall", ORE, TheLoop, CI);
927 }else {
928reportVectorizationFailure("Found a non-intrinsic callsite",
929"call instruction cannot be vectorized",
930"CantVectorizeLibcall", ORE, TheLoop, CI);
931 }
932returnfalse;
933 }
934
935// Some intrinsics have scalar arguments and should be same in order for
936// them to be vectorized (i.e. loop invariant).
937if (CI) {
938auto *SE = PSE.getSE();
939Intrinsic::ID IntrinID =getVectorIntrinsicIDForCall(CI, TLI);
940for (unsignedIdx = 0;Idx < CI->arg_size(); ++Idx)
941if (isVectorIntrinsicWithScalarOpAtArg(IntrinID,Idx,TTI)) {
942if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)),
943 TheLoop)) {
944reportVectorizationFailure("Found unvectorizable intrinsic",
945"intrinsic instruction cannot be vectorized",
946"CantVectorizeIntrinsic", ORE, TheLoop, CI);
947returnfalse;
948 }
949 }
950 }
951
952// If we found a vectorized variant of a function, note that so LV can
953// make better decisions about maximum VF.
954if (CI && !VFDatabase::getMappings(*CI).empty())
955 VecCallVariantsFound =true;
956
957auto CanWidenInstructionTy = [this](Instructionconst &Inst) {
958Type *InstTy = Inst.getType();
959if (!isa<StructType>(InstTy))
960returncanVectorizeTy(InstTy);
961
962// For now, we only recognize struct values returned from calls where
963// all users are extractvalue as vectorizable. All element types of the
964// struct must be types that can be widened.
965if (isa<CallInst>(Inst) &&canWidenCallReturnType(InstTy) &&
966all_of(Inst.users(), IsaPred<ExtractValueInst>)) {
967// TODO: Remove the `StructVecCallFound` flag once vectorizing calls
968// with struct returns is supported.
969 StructVecCallFound =true;
970returntrue;
971 }
972
973returnfalse;
974 };
975
976// Check that the instruction return type is vectorizable.
977// We can't vectorize casts from vector type to scalar type.
978// Also, we can't vectorize extractelement instructions.
979if (!CanWidenInstructionTy(I) ||
980 (isa<CastInst>(I) &&
981 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
982 isa<ExtractElementInst>(I)) {
983reportVectorizationFailure("Found unvectorizable type",
984"instruction return type cannot be vectorized",
985"CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
986returnfalse;
987 }
988
989// Check that the stored type is vectorizable.
990if (auto *ST = dyn_cast<StoreInst>(&I)) {
991Type *T =ST->getValueOperand()->getType();
992if (!VectorType::isValidElementType(T)) {
993reportVectorizationFailure("Store instruction cannot be vectorized",
994"CantVectorizeStore", ORE, TheLoop, ST);
995returnfalse;
996 }
997
998// For nontemporal stores, check that a nontemporal vector version is
999// supported on the target.
1000if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1001// Arbitrarily try a vector of 2 elements.
1002auto *VecTy =FixedVectorType::get(T,/*NumElts=*/2);
1003assert(VecTy &&"did not find vectorized version of stored type");
1004if (!TTI->isLegalNTStore(VecTy,ST->getAlign())) {
1005reportVectorizationFailure(
1006"nontemporal store instruction cannot be vectorized",
1007"CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1008returnfalse;
1009 }
1010 }
1011
1012 }elseif (auto *LD = dyn_cast<LoadInst>(&I)) {
1013if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1014// For nontemporal loads, check that a nontemporal vector version is
1015// supported on the target (arbitrarily try a vector of 2 elements).
1016auto *VecTy =FixedVectorType::get(I.getType(),/*NumElts=*/2);
1017assert(VecTy &&"did not find vectorized version of load type");
1018if (!TTI->isLegalNTLoad(VecTy,LD->getAlign())) {
1019reportVectorizationFailure(
1020"nontemporal load instruction cannot be vectorized",
1021"CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1022returnfalse;
1023 }
1024 }
1025
1026// FP instructions can allow unsafe algebra, thus vectorizable by
1027// non-IEEE-754 compliant SIMD units.
1028// This applies to floating-point math operations and calls, not memory
1029// operations, shuffles, or casts, as they don't change precision or
1030// semantics.
1031 }elseif (I.getType()->isFloatingPointTy() && (CI ||I.isBinaryOp()) &&
1032 !I.isFast()) {
1033LLVM_DEBUG(dbgs() <<"LV: Found FP op with unsafe algebra.\n");
1034 Hints->setPotentiallyUnsafe();
1035 }
1036
1037// Reduction instructions are allowed to have exit users.
1038// All other instructions must not have external users.
1039if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1040// We can safely vectorize loops where instructions within the loop are
1041// used outside the loop only if the SCEV predicates within the loop is
1042// same as outside the loop. Allowing the exit means reusing the SCEV
1043// outside the loop.
1044if (PSE.getPredicate().isAlwaysTrue()) {
1045 AllowedExit.insert(&I);
1046continue;
1047 }
1048reportVectorizationFailure("Value cannot be used outside the loop",
1049"ValueUsedOutsideLoop", ORE, TheLoop, &I);
1050returnfalse;
1051 }
1052 }// next instr.
1053 }
1054
1055if (!PrimaryInduction) {
1056if (Inductions.empty()) {
1057reportVectorizationFailure("Did not find one integer induction var",
1058"loop induction variable could not be identified",
1059"NoInductionVariable", ORE, TheLoop);
1060returnfalse;
1061 }
1062if (!WidestIndTy) {
1063reportVectorizationFailure("Did not find one integer induction var",
1064"integer loop induction variable could not be identified",
1065"NoIntegerInductionVariable", ORE, TheLoop);
1066returnfalse;
1067 }
1068LLVM_DEBUG(dbgs() <<"LV: Did not find one integer induction var.\n");
1069 }
1070
1071// Now we know the widest induction type, check if our found induction
1072// is the same size. If it's not, unset it here and InnerLoopVectorizer
1073// will create another.
1074if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1075 PrimaryInduction =nullptr;
1076
1077returntrue;
1078}
1079
1080/// Find histogram operations that match high-level code in loops:
1081/// \code
1082/// buckets[indices[i]]+=step;
1083/// \endcode
1084///
1085/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1086/// array the computed histogram. It uses a BinOp to sum all counts, storing
1087/// them using a loop-variant index Load from the 'indices' input array.
1088///
1089/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1090/// regardless of hardware support. When there is support, it additionally
1091/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1092/// used to update histogram in \p HistogramPtrs.
1093staticboolfindHistogram(LoadInst *LI,StoreInst *HSt,Loop *TheLoop,
1094constPredicatedScalarEvolution &PSE,
1095SmallVectorImpl<HistogramInfo> &Histograms) {
1096
1097// Store value must come from a Binary Operation.
1098Instruction *HPtrInstr =nullptr;
1099BinaryOperator *HBinOp =nullptr;
1100if (!match(HSt,m_Store(m_BinOp(HBinOp),m_Instruction(HPtrInstr))))
1101returnfalse;
1102
1103// BinOp must be an Add or a Sub modifying the bucket value by a
1104// loop invariant amount.
1105// FIXME: We assume the loop invariant term is on the RHS.
1106// Fine for an immediate/constant, but maybe not a generic value?
1107Value *HIncVal =nullptr;
1108if (!match(HBinOp,m_Add(m_Load(m_Specific(HPtrInstr)),m_Value(HIncVal))) &&
1109 !match(HBinOp,m_Sub(m_Load(m_Specific(HPtrInstr)),m_Value(HIncVal))))
1110returnfalse;
1111
1112// Make sure the increment value is loop invariant.
1113if (!TheLoop->isLoopInvariant(HIncVal))
1114returnfalse;
1115
1116// The address to store is calculated through a GEP Instruction.
1117GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(HPtrInstr);
1118if (!GEP)
1119returnfalse;
1120
1121// Restrict address calculation to constant indices except for the last term.
1122Value *HIdx =nullptr;
1123for (Value *Index :GEP->indices()) {
1124if (HIdx)
1125returnfalse;
1126if (!isa<ConstantInt>(Index))
1127 HIdx = Index;
1128 }
1129
1130if (!HIdx)
1131returnfalse;
1132
1133// Check that the index is calculated by loading from another array. Ignore
1134// any extensions.
1135// FIXME: Support indices from other sources than a linear load from memory?
1136// We're currently trying to match an operation looping over an array
1137// of indices, but there could be additional levels of indirection
1138// in place, or possibly some additional calculation to form the index
1139// from the loaded data.
1140Value *VPtrVal;
1141if (!match(HIdx,m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1142returnfalse;
1143
1144// Make sure the index address varies in this loop, not an outer loop.
1145constauto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1146if (!AR || AR->getLoop() != TheLoop)
1147returnfalse;
1148
1149// Ensure we'll have the same mask by checking that all parts of the histogram
1150// (gather load, update, scatter store) are in the same block.
1151LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1152BasicBlock *LdBB = IndexedLoad->getParent();
1153if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1154returnfalse;
1155
1156LLVM_DEBUG(dbgs() <<"LV: Found histogram for: " << *HSt <<"\n");
1157
1158// Store the operations that make up the histogram.
1159 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1160returntrue;
1161}
1162
1163bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1164// For now, we only support an IndirectUnsafe dependency that calculates
1165// a histogram
1166if (!EnableHistogramVectorization)
1167returnfalse;
1168
1169// Find a single IndirectUnsafe dependency.
1170constMemoryDepChecker::Dependence *IUDep =nullptr;
1171constMemoryDepChecker &DepChecker = LAI->getDepChecker();
1172constauto *Deps = DepChecker.getDependences();
1173// If there were too many dependences, LAA abandons recording them. We can't
1174// proceed safely if we don't know what the dependences are.
1175if (!Deps)
1176returnfalse;
1177
1178for (constMemoryDepChecker::Dependence &Dep : *Deps) {
1179// Ignore dependencies that are either known to be safe or can be
1180// checked at runtime.
1181if (MemoryDepChecker::Dependence::isSafeForVectorization(Dep.Type) !=
1182MemoryDepChecker::VectorizationSafetyStatus::Unsafe)
1183continue;
1184
1185// We're only interested in IndirectUnsafe dependencies here, where the
1186// address might come from a load from memory. We also only want to handle
1187// one such dependency, at least for now.
1188if (Dep.Type !=MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1189returnfalse;
1190
1191 IUDep = &Dep;
1192 }
1193if (!IUDep)
1194returnfalse;
1195
1196// For now only normal loads and stores are supported.
1197LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1198StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1199
1200if (!LI || !SI)
1201returnfalse;
1202
1203LLVM_DEBUG(dbgs() <<"LV: Checking for a histogram on: " << *SI <<"\n");
1204returnfindHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1205}
1206
1207bool LoopVectorizationLegality::canVectorizeMemory() {
1208 LAI = &LAIs.getInfo(*TheLoop);
1209constOptimizationRemarkAnalysis *LAR = LAI->getReport();
1210if (LAR) {
1211 ORE->emit([&]() {
1212returnOptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1213"loop not vectorized: ", *LAR);
1214 });
1215 }
1216
1217if (!LAI->canVectorizeMemory())
1218return canVectorizeIndirectUnsafeDependences();
1219
1220if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1221reportVectorizationFailure("We don't allow storing to uniform addresses",
1222"write to a loop invariant address could not "
1223"be vectorized",
1224"CantVectorizeStoreToLoopInvariantAddress", ORE,
1225 TheLoop);
1226returnfalse;
1227 }
1228
1229// We can vectorize stores to invariant address when final reduction value is
1230// guaranteed to be stored at the end of the loop. Also, if decision to
1231// vectorize loop is made, runtime checks are added so as to make sure that
1232// invariant address won't alias with any other objects.
1233if (!LAI->getStoresToInvariantAddresses().empty()) {
1234// For each invariant address, check if last stored value is unconditional
1235// and the address is not calculated inside the loop.
1236for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1237if (!isInvariantStoreOfReduction(SI))
1238continue;
1239
1240if (blockNeedsPredication(SI->getParent())) {
1241reportVectorizationFailure(
1242"We don't allow storing to uniform addresses",
1243"write of conditional recurring variant value to a loop "
1244"invariant address could not be vectorized",
1245"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1246returnfalse;
1247 }
1248
1249// Invariant address should be defined outside of loop. LICM pass usually
1250// makes sure it happens, but in rare cases it does not, we do not want
1251// to overcomplicate vectorization to support this case.
1252if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1253if (TheLoop->contains(Ptr)) {
1254reportVectorizationFailure(
1255"Invariant address is calculated inside the loop",
1256"write to a loop invariant address could not "
1257"be vectorized",
1258"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1259returnfalse;
1260 }
1261 }
1262 }
1263
1264if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1265// For each invariant address, check its last stored value is the result
1266// of one of our reductions.
1267//
1268// We do not check if dependence with loads exists because that is already
1269// checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1270ScalarEvolution *SE = PSE.getSE();
1271SmallVector<StoreInst *, 4> UnhandledStores;
1272for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1273if (isInvariantStoreOfReduction(SI)) {
1274// Earlier stores to this address are effectively deadcode.
1275// With opaque pointers it is possible for one pointer to be used with
1276// different sizes of stored values:
1277// store i32 0, ptr %x
1278// store i8 0, ptr %x
1279// The latest store doesn't complitely overwrite the first one in the
1280// example. That is why we have to make sure that types of stored
1281// values are same.
1282// TODO: Check that bitwidth of unhandled store is smaller then the
1283// one that overwrites it and add a test.
1284erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1285returnstoreToSameAddress(SE, SI,I) &&
1286I->getValueOperand()->getType() ==
1287SI->getValueOperand()->getType();
1288 });
1289continue;
1290 }
1291 UnhandledStores.push_back(SI);
1292 }
1293
1294bool IsOK = UnhandledStores.empty();
1295// TODO: we should also validate against InvariantMemSets.
1296if (!IsOK) {
1297reportVectorizationFailure(
1298"We don't allow storing to uniform addresses",
1299"write to a loop invariant address could not "
1300"be vectorized",
1301"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1302returnfalse;
1303 }
1304 }
1305 }
1306
1307 PSE.addPredicate(LAI->getPSE().getPredicate());
1308returntrue;
1309}
1310
1311boolLoopVectorizationLegality::canVectorizeFPMath(
1312bool EnableStrictReductions) {
1313
1314// First check if there is any ExactFP math or if we allow reassociations
1315if (!Requirements->getExactFPInst() || Hints->allowReordering())
1316returntrue;
1317
1318// If the above is false, we have ExactFPMath & do not allow reordering.
1319// If the EnableStrictReductions flag is set, first check if we have any
1320// Exact FP induction vars, which we cannot vectorize.
1321if (!EnableStrictReductions ||
1322any_of(getInductionVars(), [&](auto &Induction) ->bool {
1323InductionDescriptor IndDesc = Induction.second;
1324return IndDesc.getExactFPMathInst();
1325 }))
1326returnfalse;
1327
1328// We can now only vectorize if all reductions with Exact FP math also
1329// have the isOrdered flag set, which indicates that we can move the
1330// reduction operations in-loop.
1331return (all_of(getReductionVars(), [&](auto &Reduction) ->bool {
1332constRecurrenceDescriptor &RdxDesc =Reduction.second;
1333return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1334 }));
1335}
1336
1337boolLoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
1338returnany_of(getReductionVars(), [&](auto &Reduction) ->bool {
1339constRecurrenceDescriptor &RdxDesc =Reduction.second;
1340return RdxDesc.IntermediateStore == SI;
1341 });
1342}
1343
1344boolLoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
1345returnany_of(getReductionVars(), [&](auto &Reduction) ->bool {
1346constRecurrenceDescriptor &RdxDesc =Reduction.second;
1347if (!RdxDesc.IntermediateStore)
1348returnfalse;
1349
1350ScalarEvolution *SE = PSE.getSE();
1351Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1352return V == InvariantAddress ||
1353 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1354 });
1355}
1356
1357boolLoopVectorizationLegality::isInductionPhi(constValue *V) const{
1358Value *In0 =const_cast<Value *>(V);
1359PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1360if (!PN)
1361returnfalse;
1362
1363return Inductions.count(PN);
1364}
1365
1366constInductionDescriptor *
1367LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const{
1368if (!isInductionPhi(Phi))
1369returnnullptr;
1370auto &ID =getInductionVars().find(Phi)->second;
1371if (ID.getKind() ==InductionDescriptor::IK_IntInduction ||
1372ID.getKind() ==InductionDescriptor::IK_FpInduction)
1373return &ID;
1374returnnullptr;
1375}
1376
1377constInductionDescriptor *
1378LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const{
1379if (!isInductionPhi(Phi))
1380returnnullptr;
1381auto &ID =getInductionVars().find(Phi)->second;
1382if (ID.getKind() ==InductionDescriptor::IK_PtrInduction)
1383return &ID;
1384returnnullptr;
1385}
1386
1387boolLoopVectorizationLegality::isCastedInductionVariable(
1388constValue *V) const{
1389auto *Inst = dyn_cast<Instruction>(V);
1390return (Inst && InductionCastsToIgnore.count(Inst));
1391}
1392
1393boolLoopVectorizationLegality::isInductionVariable(constValue *V) const{
1394returnisInductionPhi(V) ||isCastedInductionVariable(V);
1395}
1396
1397boolLoopVectorizationLegality::isFixedOrderRecurrence(
1398constPHINode *Phi) const{
1399return FixedOrderRecurrences.count(Phi);
1400}
1401
1402boolLoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const{
1403// When vectorizing early exits, create predicates for the latch block only.
1404// The early exiting block must be a direct predecessor of the latch at the
1405// moment.
1406BasicBlock *Latch = TheLoop->getLoopLatch();
1407if (hasUncountableEarlyExit()) {
1408assert(
1409is_contained(predecessors(Latch),getUncountableEarlyExitingBlock()) &&
1410"Uncountable exiting block must be a direct predecessor of latch");
1411return BB == Latch;
1412 }
1413returnLoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1414}
1415
1416bool LoopVectorizationLegality::blockCanBePredicated(
1417BasicBlock *BB,SmallPtrSetImpl<Value *> &SafePtrs,
1418SmallPtrSetImpl<const Instruction *> &MaskedOp) const{
1419for (Instruction &I : *BB) {
1420// We can predicate blocks with calls to assume, as long as we drop them in
1421// case we flatten the CFG via predication.
1422if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1423 MaskedOp.insert(&I);
1424continue;
1425 }
1426
1427// Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1428// TODO: there might be cases that it should block the vectorization. Let's
1429// ignore those for now.
1430if (isa<NoAliasScopeDeclInst>(&I))
1431continue;
1432
1433// We can allow masked calls if there's at least one vector variant, even
1434// if we end up scalarizing due to the cost model calculations.
1435// TODO: Allow other calls if they have appropriate attributes... readonly
1436// and argmemonly?
1437if (CallInst *CI = dyn_cast<CallInst>(&I))
1438if (VFDatabase::hasMaskedVariant(*CI)) {
1439 MaskedOp.insert(CI);
1440continue;
1441 }
1442
1443// Loads are handled via masking (or speculated if safe to do so.)
1444if (auto *LI = dyn_cast<LoadInst>(&I)) {
1445if (!SafePtrs.count(LI->getPointerOperand()))
1446 MaskedOp.insert(LI);
1447continue;
1448 }
1449
1450// Predicated store requires some form of masking:
1451// 1) masked store HW instruction,
1452// 2) emulation via load-blend-store (only if safe and legal to do so,
1453// be aware on the race conditions), or
1454// 3) element-by-element predicate check and scalar store.
1455if (auto *SI = dyn_cast<StoreInst>(&I)) {
1456 MaskedOp.insert(SI);
1457continue;
1458 }
1459
1460if (I.mayReadFromMemory() ||I.mayWriteToMemory() ||I.mayThrow())
1461returnfalse;
1462 }
1463
1464returntrue;
1465}
1466
1467bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1468if (!EnableIfConversion) {
1469reportVectorizationFailure("If-conversion is disabled",
1470"IfConversionDisabled", ORE, TheLoop);
1471returnfalse;
1472 }
1473
1474assert(TheLoop->getNumBlocks() > 1 &&"Single block loops are vectorizable");
1475
1476// A list of pointers which are known to be dereferenceable within scope of
1477// the loop body for each iteration of the loop which executes. That is,
1478// the memory pointed to can be dereferenced (with the access size implied by
1479// the value's type) unconditionally within the loop header without
1480// introducing a new fault.
1481SmallPtrSet<Value *, 8> SafePointers;
1482
1483// Collect safe addresses.
1484for (BasicBlock *BB : TheLoop->blocks()) {
1485if (!blockNeedsPredication(BB)) {
1486for (Instruction &I : *BB)
1487if (auto *Ptr =getLoadStorePointerOperand(&I))
1488 SafePointers.insert(Ptr);
1489continue;
1490 }
1491
1492// For a block which requires predication, a address may be safe to access
1493// in the loop w/o predication if we can prove dereferenceability facts
1494// sufficient to ensure it'll never fault within the loop. For the moment,
1495// we restrict this to loads; stores are more complicated due to
1496// concurrency restrictions.
1497ScalarEvolution &SE = *PSE.getSE();
1498SmallVector<const SCEVPredicate *, 4> Predicates;
1499for (Instruction &I : *BB) {
1500LoadInst *LI = dyn_cast<LoadInst>(&I);
1501// Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1502// that it will consider loops that need guarding by SCEV checks. The
1503// vectoriser will generate these checks if we decide to vectorise.
1504if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1505isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1506 &Predicates))
1507 SafePointers.insert(LI->getPointerOperand());
1508 Predicates.clear();
1509 }
1510 }
1511
1512// Collect the blocks that need predication.
1513for (BasicBlock *BB : TheLoop->blocks()) {
1514// We support only branches and switch statements as terminators inside the
1515// loop.
1516if (isa<SwitchInst>(BB->getTerminator())) {
1517if (TheLoop->isLoopExiting(BB)) {
1518reportVectorizationFailure("Loop contains an unsupported switch",
1519"LoopContainsUnsupportedSwitch", ORE,
1520 TheLoop, BB->getTerminator());
1521returnfalse;
1522 }
1523 }elseif (!isa<BranchInst>(BB->getTerminator())) {
1524reportVectorizationFailure("Loop contains an unsupported terminator",
1525"LoopContainsUnsupportedTerminator", ORE,
1526 TheLoop, BB->getTerminator());
1527returnfalse;
1528 }
1529
1530// We must be able to predicate all blocks that need to be predicated.
1531if (blockNeedsPredication(BB) &&
1532 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1533reportVectorizationFailure(
1534"Control flow cannot be substituted for a select","NoCFGForSelect",
1535 ORE, TheLoop, BB->getTerminator());
1536returnfalse;
1537 }
1538 }
1539
1540// We can if-convert this loop.
1541returntrue;
1542}
1543
1544// Helper function to canVectorizeLoopNestCFG.
1545bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1546bool UseVPlanNativePath) {
1547assert((UseVPlanNativePath || Lp->isInnermost()) &&
1548"VPlan-native path is not enabled.");
1549
1550// TODO: ORE should be improved to show more accurate information when an
1551// outer loop can't be vectorized because a nested loop is not understood or
1552// legal. Something like: "outer_loop_location: loop not vectorized:
1553// (inner_loop_location) loop control flow is not understood by vectorizer".
1554
1555// Store the result and return it at the end instead of exiting early, in case
1556// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1557boolResult =true;
1558bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1559
1560// We must have a loop in canonical form. Loops with indirectbr in them cannot
1561// be canonicalized.
1562if (!Lp->getLoopPreheader()) {
1563reportVectorizationFailure("Loop doesn't have a legal pre-header",
1564"loop control flow is not understood by vectorizer",
1565"CFGNotUnderstood", ORE, TheLoop);
1566if (DoExtraAnalysis)
1567Result =false;
1568else
1569returnfalse;
1570 }
1571
1572// We must have a single backedge.
1573if (Lp->getNumBackEdges() != 1) {
1574reportVectorizationFailure("The loop must have a single backedge",
1575"loop control flow is not understood by vectorizer",
1576"CFGNotUnderstood", ORE, TheLoop);
1577if (DoExtraAnalysis)
1578Result =false;
1579else
1580returnfalse;
1581 }
1582
1583returnResult;
1584}
1585
1586bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1587Loop *Lp,bool UseVPlanNativePath) {
1588// Store the result and return it at the end instead of exiting early, in case
1589// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1590boolResult =true;
1591bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1592if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1593if (DoExtraAnalysis)
1594Result =false;
1595else
1596returnfalse;
1597 }
1598
1599// Recursively check whether the loop control flow of nested loops is
1600// understood.
1601for (Loop *SubLp : *Lp)
1602if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1603if (DoExtraAnalysis)
1604Result =false;
1605else
1606returnfalse;
1607 }
1608
1609returnResult;
1610}
1611
1612bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1613BasicBlock *LatchBB = TheLoop->getLoopLatch();
1614if (!LatchBB) {
1615reportVectorizationFailure("Loop does not have a latch",
1616"Cannot vectorize early exit loop",
1617"NoLatchEarlyExit", ORE, TheLoop);
1618returnfalse;
1619 }
1620
1621if (Reductions.size() || FixedOrderRecurrences.size()) {
1622reportVectorizationFailure(
1623"Found reductions or recurrences in early-exit loop",
1624"Cannot vectorize early exit loop with reductions or recurrences",
1625"RecurrencesInEarlyExitLoop", ORE, TheLoop);
1626returnfalse;
1627 }
1628
1629SmallVector<BasicBlock *, 8> ExitingBlocks;
1630 TheLoop->getExitingBlocks(ExitingBlocks);
1631
1632// Keep a record of all the exiting blocks.
1633SmallVector<const SCEVPredicate *, 4> Predicates;
1634 std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge;
1635for (BasicBlock *BB : ExitingBlocks) {
1636constSCEV *EC =
1637 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1638if (isa<SCEVCouldNotCompute>(EC)) {
1639SmallVector<BasicBlock *, 2> Succs(successors(BB));
1640if (Succs.size() != 2) {
1641reportVectorizationFailure(
1642"Early exiting block does not have exactly two successors",
1643"Incorrect number of successors from early exiting block",
1644"EarlyExitTooManySuccessors", ORE, TheLoop);
1645returnfalse;
1646 }
1647
1648BasicBlock *ExitBlock;
1649if (!TheLoop->contains(Succs[0]))
1650 ExitBlock = Succs[0];
1651else {
1652assert(!TheLoop->contains(Succs[1]));
1653 ExitBlock = Succs[1];
1654 }
1655
1656if (SingleUncountableEdge) {
1657reportVectorizationFailure(
1658"Loop has too many uncountable exits",
1659"Cannot vectorize early exit loop with more than one early exit",
1660"TooManyUncountableEarlyExits", ORE, TheLoop);
1661returnfalse;
1662 }
1663
1664 SingleUncountableEdge = {BB, ExitBlock};
1665 }else
1666 CountableExitingBlocks.push_back(BB);
1667 }
1668// We can safely ignore the predicates here because when vectorizing the loop
1669// the PredicatatedScalarEvolution class will keep track of all predicates
1670// for each exiting block anyway. This happens when calling
1671// PSE.getSymbolicMaxBackedgeTakenCount() below.
1672 Predicates.clear();
1673
1674if (!SingleUncountableEdge) {
1675LLVM_DEBUG(dbgs() <<"LV: Cound not find any uncountable exits");
1676returnfalse;
1677 }
1678
1679// The only supported early exit loops so far are ones where the early
1680// exiting block is a unique predecessor of the latch block.
1681BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1682if (LatchPredBB != SingleUncountableEdge->first) {
1683reportVectorizationFailure("Early exit is not the latch predecessor",
1684"Cannot vectorize early exit loop",
1685"EarlyExitNotLatchPredecessor", ORE, TheLoop);
1686returnfalse;
1687 }
1688
1689// The latch block must have a countable exit.
1690if (isa<SCEVCouldNotCompute>(
1691 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1692reportVectorizationFailure(
1693"Cannot determine exact exit count for latch block",
1694"Cannot vectorize early exit loop",
1695"UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1696returnfalse;
1697 }
1698assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1699"Latch block not found in list of countable exits!");
1700
1701// Check to see if there are instructions that could potentially generate
1702// exceptions or have side-effects.
1703auto IsSafeOperation = [](Instruction *I) ->bool {
1704switch (I->getOpcode()) {
1705case Instruction::Load:
1706case Instruction::Store:
1707case Instruction::PHI:
1708case Instruction::Br:
1709// These are checked separately.
1710returntrue;
1711default:
1712returnisSafeToSpeculativelyExecute(I);
1713 }
1714 };
1715
1716for (auto *BB : TheLoop->blocks())
1717for (auto &I : *BB) {
1718if (I.mayWriteToMemory()) {
1719// We don't support writes to memory.
1720reportVectorizationFailure(
1721"Writes to memory unsupported in early exit loops",
1722"Cannot vectorize early exit loop with writes to memory",
1723"WritesInEarlyExitLoop", ORE, TheLoop);
1724returnfalse;
1725 }elseif (!IsSafeOperation(&I)) {
1726reportVectorizationFailure("Early exit loop contains operations that "
1727"cannot be speculatively executed",
1728"UnsafeOperationsEarlyExitLoop", ORE,
1729 TheLoop);
1730returnfalse;
1731 }
1732 }
1733
1734// The vectoriser cannot handle loads that occur after the early exit block.
1735assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first &&
1736"Expected latch predecessor to be the early exiting block");
1737
1738// TODO: Handle loops that may fault.
1739 Predicates.clear();
1740if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1741 &Predicates)) {
1742reportVectorizationFailure(
1743"Loop may fault",
1744"Cannot vectorize potentially faulting early exit loop",
1745"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1746returnfalse;
1747 }
1748
1749 [[maybe_unused]]constSCEV *SymbolicMaxBTC =
1750 PSE.getSymbolicMaxBackedgeTakenCount();
1751// Since we have an exact exit count for the latch and the early exit
1752// dominates the latch, then this should guarantee a computed SCEV value.
1753assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1754"Failed to get symbolic expression for backedge taken count");
1755LLVM_DEBUG(dbgs() <<"LV: Found an early exit loop with symbolic max "
1756"backedge taken count: "
1757 << *SymbolicMaxBTC <<'\n');
1758 UncountableEdge = SingleUncountableEdge;
1759returntrue;
1760}
1761
1762boolLoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1763// Store the result and return it at the end instead of exiting early, in case
1764// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1765bool Result =true;
1766
1767bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1768// Check whether the loop-related control flow in the loop nest is expected by
1769// vectorizer.
1770if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1771if (DoExtraAnalysis) {
1772LLVM_DEBUG(dbgs() <<"LV: legality check failed: loop nest");
1773 Result =false;
1774 }else {
1775returnfalse;
1776 }
1777 }
1778
1779// We need to have a loop header.
1780LLVM_DEBUG(dbgs() <<"LV: Found a loop: " << TheLoop->getHeader()->getName()
1781 <<'\n');
1782
1783// Specific checks for outer loops. We skip the remaining legal checks at this
1784// point because they don't support outer loops.
1785if (!TheLoop->isInnermost()) {
1786assert(UseVPlanNativePath &&"VPlan-native path is not enabled.");
1787
1788if (!canVectorizeOuterLoop()) {
1789reportVectorizationFailure("Unsupported outer loop",
1790"UnsupportedOuterLoop", ORE, TheLoop);
1791// TODO: Implement DoExtraAnalysis when subsequent legal checks support
1792// outer loops.
1793returnfalse;
1794 }
1795
1796LLVM_DEBUG(dbgs() <<"LV: We can vectorize this outer loop!\n");
1797return Result;
1798 }
1799
1800assert(TheLoop->isInnermost() &&"Inner loop expected.");
1801// Check if we can if-convert non-single-bb loops.
1802unsigned NumBlocks = TheLoop->getNumBlocks();
1803if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1804LLVM_DEBUG(dbgs() <<"LV: Can't if-convert the loop.\n");
1805if (DoExtraAnalysis)
1806 Result =false;
1807else
1808returnfalse;
1809 }
1810
1811// Check if we can vectorize the instructions and CFG in this loop.
1812if (!canVectorizeInstrs()) {
1813LLVM_DEBUG(dbgs() <<"LV: Can't vectorize the instructions or CFG\n");
1814if (DoExtraAnalysis)
1815 Result =false;
1816else
1817returnfalse;
1818 }
1819
1820if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1821if (TheLoop->getExitingBlock()) {
1822reportVectorizationFailure("Cannot vectorize uncountable loop",
1823"UnsupportedUncountableLoop", ORE, TheLoop);
1824if (DoExtraAnalysis)
1825 Result =false;
1826else
1827returnfalse;
1828 }else {
1829if (!isVectorizableEarlyExitLoop()) {
1830 UncountableEdge = std::nullopt;
1831if (DoExtraAnalysis)
1832 Result =false;
1833else
1834returnfalse;
1835 }
1836 }
1837 }
1838
1839// Go over each instruction and look at memory deps.
1840if (!canVectorizeMemory()) {
1841LLVM_DEBUG(dbgs() <<"LV: Can't vectorize due to memory conflicts\n");
1842if (DoExtraAnalysis)
1843 Result =false;
1844else
1845returnfalse;
1846 }
1847
1848if (Result) {
1849LLVM_DEBUG(dbgs() <<"LV: We can vectorize this loop"
1850 << (LAI->getRuntimePointerChecking()->Need
1851 ?" (with a runtime bound check)"
1852 :"")
1853 <<"!\n");
1854 }
1855
1856unsigned SCEVThreshold =VectorizeSCEVCheckThreshold;
1857if (Hints->getForce() ==LoopVectorizeHints::FK_Enabled)
1858 SCEVThreshold =PragmaVectorizeSCEVCheckThreshold;
1859
1860if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1861LLVM_DEBUG(dbgs() <<"LV: Vectorization not profitable "
1862"due to SCEVThreshold");
1863reportVectorizationFailure("Too many SCEV checks needed",
1864"Too many SCEV assumptions need to be made and checked at runtime",
1865"TooManySCEVRunTimeChecks", ORE, TheLoop);
1866if (DoExtraAnalysis)
1867 Result =false;
1868else
1869returnfalse;
1870 }
1871
1872// Okay! We've done all the tests. If any have failed, return false. Otherwise
1873// we can vectorize, and at this point we don't have any other mem analysis
1874// which may limit our maximum vectorization factor, so just return true with
1875// no restrictions.
1876return Result;
1877}
1878
1879boolLoopVectorizationLegality::canFoldTailByMasking() const{
1880
1881LLVM_DEBUG(dbgs() <<"LV: checking if tail can be folded by masking.\n");
1882
1883SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1884
1885for (constauto &Reduction :getReductionVars())
1886 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1887
1888// TODO: handle non-reduction outside users when tail is folded by masking.
1889for (auto *AE : AllowedExit) {
1890// Check that all users of allowed exit values are inside the loop or
1891// are the live-out of a reduction.
1892if (ReductionLiveOuts.count(AE))
1893continue;
1894for (User *U : AE->users()) {
1895Instruction *UI = cast<Instruction>(U);
1896if (TheLoop->contains(UI))
1897continue;
1898LLVM_DEBUG(
1899dbgs()
1900 <<"LV: Cannot fold tail by masking, loop has an outside user for "
1901 << *UI <<"\n");
1902returnfalse;
1903 }
1904 }
1905
1906for (constauto &Entry :getInductionVars()) {
1907PHINode *OrigPhi = Entry.first;
1908for (User *U : OrigPhi->users()) {
1909auto *UI = cast<Instruction>(U);
1910if (!TheLoop->contains(UI)) {
1911LLVM_DEBUG(dbgs() <<"LV: Cannot fold tail by masking, loop IV has an "
1912"outside user for "
1913 << *UI <<"\n");
1914returnfalse;
1915 }
1916 }
1917 }
1918
1919// The list of pointers that we can safely read and write to remains empty.
1920SmallPtrSet<Value *, 8> SafePointers;
1921
1922// Check all blocks for predication, including those that ordinarily do not
1923// need predication such as the header block.
1924SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1925for (BasicBlock *BB : TheLoop->blocks()) {
1926if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1927LLVM_DEBUG(dbgs() <<"LV: Cannot fold tail by masking.\n");
1928returnfalse;
1929 }
1930 }
1931
1932LLVM_DEBUG(dbgs() <<"LV: can fold tail by masking.\n");
1933
1934returntrue;
1935}
1936
1937voidLoopVectorizationLegality::prepareToFoldTailByMasking() {
1938// The list of pointers that we can safely read and write to remains empty.
1939SmallPtrSet<Value *, 8> SafePointers;
1940
1941// Mark all blocks for predication, including those that ordinarily do not
1942// need predication such as the header block.
1943for (BasicBlock *BB : TheLoop->blocks()) {
1944 [[maybe_unused]]bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1945assert(R &&"Must be able to predicate block when tail-folding.");
1946 }
1947}
1948
1949}// namespace llvm
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition:CommandLine.h:686
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition:Debug.h:106
Name
std::string Name
Definition:ELFObjHandler.cpp:77
DEBUG_TYPE
#define DEBUG_TYPE
Definition:GenericCycleImpl.h:31
GEP
Hexagon Common GEP
Definition:HexagonCommonGEP.cpp:170
IntrinsicInst.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition:Lint.cpp:557
Loads.h
LoopInfo.h
Reduction
loop Loop Strength Reduction
Definition:LoopStrengthReduce.cpp:7191
ForceScalableVectorization
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
LV_NAME
#define LV_NAME
Definition:LoopVectorizationLegality.cpp:34
PragmaVectorizeSCEVCheckThreshold
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
MaxInterleaveFactor
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition:LoopVectorizationLegality.cpp:87
AllowStridedPointerIVs
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
VectorizeSCEVCheckThreshold
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
EnableHistogramVectorization
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
EnableIfConversion
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
LoopVectorizationLegality.h
This file defines the LoopVectorizationLegality class.
LoopVectorize.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
H
#define H(x, y, z)
Definition:MD5.cpp:57
OptimizationRemarkEmitter.h
if
if(PassOpts->AAPipeline)
Definition:PassBuilderBindings.cpp:64
PatternMatch.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
visit
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Definition:SPIRVPostLegalizer.cpp:132
ScalarEvolutionExpressions.h
SizeOpts.h
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
TargetLibraryInfo.h
TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.
ValueTracking.h
VectorUtils.h
Rewriter
Virtual Register Rewriter
Definition:VirtRegMap.cpp:261
IV
static const uint32_t IV[8]
Definition:blake3_impl.h:78
Predicate
Definition:AMDGPURegBankLegalizeRules.cpp:332
T
llvm::APInt
Class for arbitrary precision integers.
Definition:APInt.h:78
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition:BasicBlock.cpp:489
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition:BasicBlock.cpp:168
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition:BasicBlock.h:240
llvm::BinaryOperator
Definition:InstrTypes.h:170
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition:InstrTypes.h:1341
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition:Metadata.h:532
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition:Constants.h:83
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::DenseMap
Definition:DenseMap.h:727
llvm::ElementCount
Definition:TypeSize.h:300
llvm::ElementCount::getScalable
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition:TypeSize.h:314
llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition:TypeSize.h:311
llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition:TypeSize.h:322
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition:Type.cpp:791
llvm::Function
Definition:Function.h:63
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition:Instructions.h:933
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition:IVDescriptors.h:334
llvm::InductionDescriptor::IK_FpInduction
@ IK_FpInduction
Floating point induction variable.
Definition:IVDescriptors.h:341
llvm::InductionDescriptor::IK_PtrInduction
@ IK_PtrInduction
Pointer induction var. Step = C.
Definition:IVDescriptors.h:340
llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition:IVDescriptors.h:339
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition:IVDescriptors.cpp:1513
llvm::InductionDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Definition:IVDescriptors.h:387
llvm::Instruction
Definition:Instruction.h:68
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition:LLVMContext.h:67
llvm::LoadInst
An instruction for reading from memory.
Definition:Instructions.h:176
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition:Instructions.h:255
llvm::LoopAccessInfoManager::getInfo
const LoopAccessInfo & getInfo(Loop &L)
Definition:LoopAccessAnalysis.cpp:3079
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition:LoopAccessAnalysis.h:682
llvm::LoopAccessInfo::getStoresToInvariantAddresses
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
Definition:LoopAccessAnalysis.h:713
llvm::LoopAccessInfo::getReport
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
Definition:LoopAccessAnalysis.h:678
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition:LoopAccessAnalysis.h:655
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition:LoopAccessAnalysis.h:648
llvm::LoopAccessInfo::isInvariant
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
Definition:LoopAccessAnalysis.cpp:2802
llvm::LoopAccessInfo::hasLoadStoreDependenceInvolvingLoopInvariantAddress
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
Definition:LoopAccessAnalysis.h:708
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition:LoopAccessAnalysis.h:722
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition:LoopAccessAnalysis.cpp:2773
llvm::LoopAccessInfo::getSymbolicStrides
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
Definition:LoopAccessAnalysis.h:693
llvm::LoopAccessInfo::hasStoreStoreDependenceInvolvingLoopInvariantAddress
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
Definition:LoopAccessAnalysis.h:702
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition:GenericLoopInfo.h:124
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition:GenericLoopInfoImpl.h:256
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition:GenericLoopInfo.h:167
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition:GenericLoopInfo.h:187
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition:GenericLoopInfo.h:248
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition:GenericLoopInfoImpl.h:33
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition:GenericLoopInfo.h:90
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition:GenericLoopInfo.h:180
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition:GenericLoopInfoImpl.h:210
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition:GenericLoopInfoImpl.h:48
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition:GenericLoopInfo.h:227
llvm::LoopInfoBase::isLoopHeader
bool isLoopHeader(const BlockT *BB) const
Definition:GenericLoopInfo.h:619
llvm::LoopVectorizationLegality::isInvariantStoreOfReduction
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
Definition:LoopVectorizationLegality.cpp:1337
llvm::LoopVectorizationLegality::isInvariantAddressOfReduction
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
Definition:LoopVectorizationLegality.cpp:1344
llvm::LoopVectorizationLegality::blockNeedsPredication
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition:LoopVectorizationLegality.cpp:1402
llvm::LoopVectorizationLegality::canVectorize
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
Definition:LoopVectorizationLegality.cpp:1762
llvm::LoopVectorizationLegality::isConsecutivePtr
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
Definition:LoopVectorizationLegality.cpp:454
llvm::LoopVectorizationLegality::canVectorizeFPMath
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
Definition:LoopVectorizationLegality.cpp:1311
llvm::LoopVectorizationLegality::isFixedOrderRecurrence
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
Definition:LoopVectorizationLegality.cpp:1397
llvm::LoopVectorizationLegality::getPointerInductionDescriptor
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
Definition:LoopVectorizationLegality.cpp:1378
llvm::LoopVectorizationLegality::getIntOrFpInductionDescriptor
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
Definition:LoopVectorizationLegality.cpp:1367
llvm::LoopVectorizationLegality::isInductionPhi
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
Definition:LoopVectorizationLegality.cpp:1357
llvm::LoopVectorizationLegality::isUniform
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
Definition:LoopVectorizationLegality.cpp:567
llvm::LoopVectorizationLegality::getInductionVars
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
Definition:LoopVectorizationLegality.h:305
llvm::LoopVectorizationLegality::isInvariant
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
Definition:LoopVectorizationLegality.cpp:472
llvm::LoopVectorizationLegality::getReductionVars
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Definition:LoopVectorizationLegality.h:302
llvm::LoopVectorizationLegality::canFoldTailByMasking
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
Definition:LoopVectorizationLegality.cpp:1879
llvm::LoopVectorizationLegality::prepareToFoldTailByMasking
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
Definition:LoopVectorizationLegality.cpp:1937
llvm::LoopVectorizationLegality::hasUncountableEarlyExit
bool hasUncountableEarlyExit() const
Returns true if the loop has exactly one uncountable early exit, i.e.
Definition:LoopVectorizationLegality.h:394
llvm::LoopVectorizationLegality::isUniformMemOp
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
Definition:LoopVectorizationLegality.cpp:600
llvm::LoopVectorizationLegality::getUncountableEarlyExitingBlock
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block, if there is exactly one.
Definition:LoopVectorizationLegality.h:399
llvm::LoopVectorizationLegality::isInductionVariable
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
Definition:LoopVectorizationLegality.cpp:1393
llvm::LoopVectorizationLegality::isCastedInductionVariable
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Definition:LoopVectorizationLegality.cpp:1387
llvm::LoopVectorizationRequirements::getExactFPInst
Instruction * getExactFPInst()
Definition:LoopVectorizationLegality.h:221
llvm::LoopVectorizationRequirements::addExactFPMathInst
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
Definition:LoopVectorizationLegality.h:216
llvm::LoopVectorizeHints::ScalableForceKind
ScalableForceKind
Definition:LoopVectorizationLegality.h:113
llvm::LoopVectorizeHints::SK_PreferScalable
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
Definition:LoopVectorizationLegality.h:121
llvm::LoopVectorizeHints::SK_Unspecified
@ SK_Unspecified
Not selected.
Definition:LoopVectorizationLegality.h:115
llvm::LoopVectorizeHints::SK_FixedWidthOnly
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
Definition:LoopVectorizationLegality.h:117
llvm::LoopVectorizeHints::getForce
enum ForceKind getForce() const
Definition:LoopVectorizationLegality.h:153
llvm::LoopVectorizeHints::allowVectorization
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
Definition:LoopVectorizationLegality.cpp:182
llvm::LoopVectorizeHints::allowReordering
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
Definition:LoopVectorizationLegality.cpp:250
llvm::LoopVectorizeHints::emitRemarkWithHints
void emitRemarkWithHints() const
Dumps all the hint information.
Definition:LoopVectorizationLegality.cpp:215
llvm::LoopVectorizeHints::getWidth
ElementCount getWidth() const
Definition:LoopVectorizationLegality.h:137
llvm::LoopVectorizeHints::FK_Enabled
@ FK_Enabled
Forcing enabled.
Definition:LoopVectorizationLegality.h:110
llvm::LoopVectorizeHints::FK_Undefined
@ FK_Undefined
Not selected.
Definition:LoopVectorizationLegality.h:108
llvm::LoopVectorizeHints::FK_Disabled
@ FK_Disabled
Forcing disabled.
Definition:LoopVectorizationLegality.h:109
llvm::LoopVectorizeHints::setAlreadyVectorized
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
Definition:LoopVectorizationLegality.cpp:163
llvm::LoopVectorizeHints::LoopVectorizeHints
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Definition:LoopVectorizationLegality.cpp:107
llvm::LoopVectorizeHints::vectorizeAnalysisPassName
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Definition:LoopVectorizationLegality.cpp:240
llvm::LoopVectorizeHints::getInterleave
unsigned getInterleave() const
Definition:LoopVectorizationLegality.h:142
llvm::LoopVectorizeHints::getIsVectorized
unsigned getIsVectorized() const
Definition:LoopVectorizationLegality.h:151
llvm::Loop
Represents a single loop in the control flow graph.
Definition:LoopInfo.h:39
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition:LoopInfo.cpp:632
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition:LoopInfo.cpp:61
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition:LoopInfo.cpp:526
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition:LoopInfo.cpp:151
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition:LoopInfo.cpp:502
llvm::MDNode
Metadata node.
Definition:Metadata.h:1073
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition:Metadata.h:1434
llvm::MDNode::operands
ArrayRef< MDOperand > operands() const
Definition:Metadata.h:1432
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition:Metadata.h:1549
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition:Metadata.h:1440
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition:Metadata.h:895
llvm::MDString
A single uniqued string.
Definition:Metadata.h:724
llvm::MDString::getString
StringRef getString() const
Definition:Metadata.cpp:616
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition:Metadata.cpp:606
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition:MapVector.h:165
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition:MapVector.h:167
llvm::MapVector::empty
bool empty() const
Definition:MapVector.h:79
llvm::MapVector::size
size_type size() const
Definition:MapVector.h:60
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition:LoopAccessAnalysis.h:90
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition:LoopAccessAnalysis.h:229
llvm::MemoryDepChecker::VectorizationSafetyStatus::Unsafe
@ Unsafe
llvm::Metadata
Root of the metadata hierarchy.
Definition:Metadata.h:62
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition:DiagnosticInfo.h:853
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition:DiagnosticInfo.h:893
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition:OptimizationRemarkEmitter.h:32
llvm::OptimizationRemarkEmitter::allowExtraAnalysis
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Definition:OptimizationRemarkEmitter.h:97
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition:OptimizationRemarkEmitter.cpp:79
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition:DiagnosticInfo.h:807
llvm::PHINode
Definition:Instructions.h:2600
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition:ScalarEvolution.h:2383
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition:ScalarEvolution.cpp:15161
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition:ScalarEvolution.h:2422
llvm::PredicatedScalarEvolution::getPredicate
const SCEVPredicate & getPredicate() const
Definition:ScalarEvolution.cpp:15171
llvm::PredicatedScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
Definition:ScalarEvolution.cpp:15130
llvm::PredicatedScalarEvolution::getSymbolicMaxBackedgeTakenCount
const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
Definition:ScalarEvolution.cpp:15140
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition:ScalarEvolution.cpp:15111
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition:IVDescriptors.h:77
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition:IVDescriptors.h:225
llvm::RecurrenceDescriptor::isFixedOrderRecurrence
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
Definition:IVDescriptors.cpp:1042
llvm::RecurrenceDescriptor::hasExactFPMath
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Definition:IVDescriptors.h:222
llvm::RecurrenceDescriptor::getLoopExitInstr
Instruction * getLoopExitInstr() const
Definition:IVDescriptors.h:218
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition:IVDescriptors.cpp:924
llvm::RecurrenceDescriptor::isOrdered
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
Definition:IVDescriptors.h:288
llvm::RecurrenceDescriptor::IntermediateStore
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
Definition:IVDescriptors.h:304
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition:LoopAccessAnalysis.h:554
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition:ScalarEvolutionExpressions.h:347
llvm::SCEVAddRecExpr::getType
Type * getType() const
Definition:ScalarEvolutionExpressions.h:357
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition:ScalarEvolutionExpressions.h:358
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition:ScalarEvolutionExpressions.h:365
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition:ScalarEvolutionExpressions.h:359
llvm::SCEVPredicate::getComplexity
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
Definition:ScalarEvolution.h:237
llvm::SCEVPredicate::isAlwaysTrue
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
llvm::SCEVRewriteVisitor
This visitor recursively visits a SCEV expression and re-writes it.
Definition:ScalarEvolutionExpressions.h:747
llvm::SCEVRewriteVisitor::visit
const SCEV * visit(const SCEV *S)
Definition:ScalarEvolutionExpressions.h:760
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition:ScalarEvolutionExpressions.h:577
llvm::SCEV
This class represents an analyzed expression in the program.
Definition:ScalarEvolution.h:71
llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition:ScalarEvolution.h:127
llvm::ScalarEvolution
The main scalar evolution driver.
Definition:ScalarEvolution.h:447
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition:ScalarEvolution.cpp:4547
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition:ScalarEvolution.cpp:14100
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition:ScalarEvolution.cpp:4441
llvm::ScalarEvolution::getPredicatedExitCount
const SCEV * getPredicatedExitCount(const Loop *L, const BasicBlock *ExitingBlock, SmallVectorImpl< const SCEVPredicate * > *Predicates, ExitCountKind Kind=Exact)
Same as above except this uses the predicated backedge taken info and may require predicates.
Definition:ScalarEvolution.cpp:8328
llvm::ScalarEvolution::getCouldNotCompute
const SCEV * getCouldNotCompute()
Definition:ScalarEvolution.cpp:4487
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition:SmallPtrSet.h:94
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition:SmallPtrSet.h:363
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition:SmallPtrSet.h:452
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition:SmallPtrSet.h:384
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition:SmallPtrSet.h:519
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition:SmallVector.h:937
llvm::SmallVectorImpl::clear
void clear()
Definition:SmallVector.h:610
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition:SmallVector.h:267
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StoreInst
An instruction for storing to memory.
Definition:Instructions.h:292
llvm::StoreInst::getPointerOperand
Value * getPointerOperand()
Definition:Instructions.h:381
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StringRef::npos
static constexpr size_t npos
Definition:StringRef.h:53
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition:TargetLibraryInfo.h:280
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition:TargetLibraryInfo.h:407
llvm::TargetLibraryInfo::getWidestVF
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
Definition:TargetLibraryInfo.h:599
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition:TargetLibraryInfo.h:345
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition:TargetLibraryInfo.h:390
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition:TargetTransformInfo.h:212
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition:TargetTransformInfo.cpp:481
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition:TargetTransformInfo.cpp:476
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition:TargetTransformInfo.cpp:1420
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition:Twine.h:81
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition:Twine.cpp:17
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition:Type.h:270
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition:Type.h:264
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition:Type.h:128
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition:Type.h:184
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::User
Definition:User.h:44
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition:User.h:228
llvm::VFDatabase::hasMaskedVariant
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition:VectorUtils.h:83
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition:VectorUtils.h:72
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::users
iterator_range< user_iterator > users()
Definition:Value.h:421
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition:Value.cpp:309
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownLE
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition:TypeSize.h:232
llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition:TypeSize.h:171
llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition:TypeSize.h:168
llvm::details::FixedOrScalableQuantity::isZero
constexpr bool isZero() const
Definition:TypeSize.h:156
llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition:ilist_node.h:32
unsigned
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition:AMDGPUMetadata.h:395
llvm::ARM_MB::LD
@ LD
Definition:ARMBaseInfo.h:72
llvm::ARM_MB::ST
@ ST
Definition:ARMBaseInfo.h:73
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm::PatternMatch::m_Store
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
Definition:PatternMatch.h:1930
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1102
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition:PatternMatch.h:100
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition:PatternMatch.h:49
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition:PatternMatch.h:826
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition:PatternMatch.h:885
llvm::PatternMatch::m_ZExtOrSExtOrSelf
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
Definition:PatternMatch.h:2146
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition:PatternMatch.h:1923
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition:PatternMatch.h:92
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1114
llvm::SIEncodingFamily::SI
@ SI
Definition:SIDefines.h:36
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition:CommandLine.h:711
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition:RDFGraph.h:390
llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition:RDFGraph.h:393
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition:STLExtras.h:329
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1739
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition:STLExtras.h:1697
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition:VectorUtils.cpp:209
llvm::HintsAllowReordering
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
llvm::getWiderType
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
Definition:LoopVectorizationLegality.cpp:410
llvm::LibFunc
LibFunc
Definition:TargetLibraryInfo.h:68
llvm::successors
auto successors(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1376
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition:Instructions.h:4984
llvm::convertPointerToIntegerType
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
Definition:LoopVectorizationLegality.cpp:398
llvm::isUniformLoopNest
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
Definition:LoopVectorizationLegality.cpp:386
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition:MachineSizeOpts.cpp:27
llvm::isUniformLoop
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
Definition:LoopVectorizationLegality.cpp:341
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition:Loads.cpp:378
llvm::canWidenCallReturnType
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
Definition:LoopVectorizationLegality.cpp:783
llvm::isDereferenceableReadOnlyLoop
bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if the loop L cannot fault on any iteration and only contains read-only memory accesses.
Definition:Loads.cpp:817
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::reverse
auto reverse(ContainerTy &&C)
Definition:STLExtras.h:420
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition:MathExtras.h:292
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition:Debug.cpp:163
llvm::getPtrStride
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition:LoopAccessAnalysis.cpp:1435
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition:ValueTracking.cpp:7043
llvm::hasOutsideLoopUser
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition:LoopVectorizationLegality.cpp:420
llvm::storeToSameAddress
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
Definition:LoopVectorizationLegality.cpp:438
llvm::canVectorizeTy
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
Definition:VectorTypeUtils.h:84
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition:VectorUtils.cpp:134
llvm::reportVectorizationFailure
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition:LoopVectorize.cpp:887
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition:LoopInfo.cpp:1170
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition:STLExtras.h:2099
llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1377
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition:STLExtras.h:1903
llvm::findHistogram
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
Definition:LoopVectorizationLegality.cpp:1093
llvm::isTLIScalarize
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Definition:LoopVectorizationLegality.cpp:761
llvm::isDereferenceableAndAlignedInLoop
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition:Loads.cpp:278
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::SCEVExprContains
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Definition:ScalarEvolutionExpressions.h:720
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition:LoopAccessAnalysis.h:111
llvm::MemoryDepChecker::Dependence::getDestination
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Definition:LoopAccessAnalysis.h:924
llvm::MemoryDepChecker::Dependence::getSource
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
Definition:LoopAccessAnalysis.h:919
llvm::MemoryDepChecker::Dependence::IndirectUnsafe
@ IndirectUnsafe
Definition:LoopAccessAnalysis.h:122
llvm::MemoryDepChecker::Dependence::isSafeForVectorization
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Definition:LoopAccessAnalysis.cpp:1673
llvm::SCEVCouldNotCompute
An object of this class is returned by queries that could not be answered.
Definition:ScalarEvolution.h:205
llvm::VectorizationFactor
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition:LoopVectorizationPlanner.h:292
llvm::VectorizerParams
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
Definition:LoopAccessAnalysis.h:33
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition:LoopAccessAnalysis.h:35
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition:LoopAccessAnalysis.cpp:150
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition:LoopAccessAnalysis.h:40
llvm::cl::desc
Definition:CommandLine.h:409

Generated on Fri Jul 18 2025 16:46:37 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp