Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit004c67e

Browse files
authored
[LV] Vectorize maxnum/minnum w/o fast-math flags. (#148239)
Update LV to vectorize maxnum/minnum reductions without fast-math flags,by adding an extra check in the loop if any inputs to maxnum/minnum areNaN, due to maxnum/minnum behavior w.r.t to signaling NaNs. Signed-zeros are already handled consistently by maxnum/minnum.If any input is NaN, *exit the vector loop, *compute the reduction result up to the vector iteration that contained NaN inputs and * resume in the scalar loopNew recurrence kinds are added for reductions using maxnum/minnumwithout fast-math flags.PR:#148239
1 parent695660c commit004c67e

16 files changed

+731
-58
lines changed

‎llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ enum class RecurKind {
4747
FMul,///< Product of floats.
4848
FMin,///< FP min implemented in terms of select(cmp()).
4949
FMax,///< FP max implemented in terms of select(cmp()).
50+
FMinNum,///< FP min with llvm.minnum semantics including NaNs.
51+
FMaxNum,///< FP max with llvm.maxnum semantics including NaNs.
5052
FMinimum,///< FP min with llvm.minimum semantics
5153
FMaximum,///< FP max with llvm.maximum semantics
5254
FMinimumNum,///< FP min with llvm.minimumnum semantics
@@ -250,6 +252,7 @@ class RecurrenceDescriptor {
250252
/// Returns true if the recurrence kind is a floating-point min/max kind.
251253
staticboolisFPMinMaxRecurrenceKind(RecurKind Kind) {
252254
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
255+
Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
253256
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
254257
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
255258
}

‎llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
941941
m_Intrinsic<Intrinsic::minimumnum>(m_Value(),m_Value())) ||
942942
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(),m_Value()));
943943
};
944-
if (isIntMinMaxRecurrenceKind(Kind) ||
945-
(HasRequiredFMF() &&isFPMinMaxRecurrenceKind(Kind)))
944+
if (isIntMinMaxRecurrenceKind(Kind))
946945
returnisMinMaxPattern(I, Kind, Prev);
947-
elseif (isFMulAddIntrinsic(I))
946+
if (isFPMinMaxRecurrenceKind(Kind)) {
947+
InstDesc Res =isMinMaxPattern(I, Kind, Prev);
948+
if (!Res.isRecurrence())
949+
returnInstDesc(false, I);
950+
if (HasRequiredFMF())
951+
return Res;
952+
// We may be able to vectorize FMax/FMin reductions using maxnum/minnum
953+
// intrinsics with extra checks ensuring the vector loop handles only
954+
// non-NaN inputs.
955+
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(),m_Value()))) {
956+
assert(Kind == RecurKind::FMax &&
957+
"unexpected recurrence kind for maxnum");
958+
returnInstDesc(I, RecurKind::FMaxNum);
959+
}
960+
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(),m_Value()))) {
961+
assert(Kind == RecurKind::FMin &&
962+
"unexpected recurrence kind for minnum");
963+
returnInstDesc(I, RecurKind::FMinNum);
964+
}
965+
returnInstDesc(false, I);
966+
}
967+
if (isFMulAddIntrinsic(I))
948968
returnInstDesc(Kind == RecurKind::FMulAdd, I,
949969
I->hasAllowReassoc() ?nullptr : I);
950970
returnInstDesc(false, I);

‎llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -938,8 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
938938
case RecurKind::UMin:
939939
return Intrinsic::vector_reduce_umin;
940940
case RecurKind::FMax:
941+
case RecurKind::FMaxNum:
941942
return Intrinsic::vector_reduce_fmax;
942943
case RecurKind::FMin:
944+
case RecurKind::FMinNum:
943945
return Intrinsic::vector_reduce_fmin;
944946
case RecurKind::FMaximum:
945947
return Intrinsic::vector_reduce_fmaximum;
@@ -1037,8 +1039,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
10371039
case RecurKind::SMax:
10381040
return Intrinsic::smax;
10391041
case RecurKind::FMin:
1042+
case RecurKind::FMinNum:
10401043
return Intrinsic::minnum;
10411044
case RecurKind::FMax:
1045+
case RecurKind::FMaxNum:
10421046
return Intrinsic::maxnum;
10431047
case RecurKind::FMinimum:
10441048
return Intrinsic::minimum;
@@ -1096,9 +1100,9 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
10961100
Value *Right) {
10971101
Type *Ty = Left->getType();
10981102
if (Ty->isIntOrIntVectorTy() ||
1099-
(RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
1103+
(RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum ||
1104+
RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
11001105
RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) {
1101-
// TODO: Add float minnum/maxnum support when FMF nnan is set.
11021106
Intrinsic::ID Id =getMinMaxReductionIntrinsicOp(RK);
11031107
return Builder.CreateIntrinsic(Ty, Id, {Left, Right},nullptr,
11041108
"rdx.minmax");
@@ -1308,6 +1312,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
13081312
case RecurKind::UMin:
13091313
case RecurKind::FMax:
13101314
case RecurKind::FMin:
1315+
case RecurKind::FMinNum:
1316+
case RecurKind::FMaxNum:
13111317
case RecurKind::FMinimum:
13121318
case RecurKind::FMaximum:
13131319
case RecurKind::FMinimumNum:

‎llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ class VPBuilder {
230230

231231
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
232232
/// and \p B.
233-
/// TODO: add createFCmp when needed.
234233
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
235234
DebugLoc DL = DebugLoc::getUnknown(),
236235
const Twine &Name = "") {
@@ -240,6 +239,17 @@ class VPBuilder {
240239
newVPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
241240
}
242241

242+
/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
243+
/// and \p B.
244+
VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
245+
DebugLoc DL = DebugLoc::getUnknown(),
246+
const Twine &Name = "") {
247+
assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
248+
Pred <= CmpInst::LAST_FCMP_PREDICATE &&"invalid predicate");
249+
returntryInsertInstruction(
250+
newVPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
251+
}
252+
243253
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
244254
DebugLoc DL = DebugLoc::getUnknown(),
245255
const Twine &Name = "") {

‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4361,10 +4361,14 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43614361

43624362
boolLoopVectorizationPlanner::isCandidateForEpilogueVectorization(
43634363
ElementCount VF)const {
4364-
// Cross iteration phis such as reductions need special handling and are
4365-
// currently unsupported.
4366-
if (any_of(OrigLoop->getHeader()->phis(),
4367-
[&](PHINode &Phi) {return Legal->isFixedOrderRecurrence(&Phi); }))
4364+
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
4365+
// reductions need special handling and are currently unsupported.
4366+
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
4367+
if (!Legal->isReductionVariable(&Phi))
4368+
return Legal->isFixedOrderRecurrence(&Phi);
4369+
RecurKind RK = Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
4370+
return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum;
4371+
}))
43684372
returnfalse;
43694373

43704374
// Phis with uses outside of the loop require special handling and are
@@ -8787,6 +8791,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
87878791
// Adjust the recipes for any inloop reductions.
87888792
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
87898793

8794+
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
8795+
// NaNs if possible, bail out otherwise.
8796+
if (!VPlanTransforms::runPass(
8797+
VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
8798+
returnnullptr;
8799+
87908800
// Transform recipes to abstract recipes if it is legal and beneficial and
87918801
// clamp the range for better cost estimation.
87928802
// TODO: Enable following transform when the EVL-version of extended-reduction

‎llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23202,6 +23202,8 @@ class HorizontalReduction {
2320223202
case RecurKind::FindFirstIVUMin:
2320323203
case RecurKind::FindLastIVSMax:
2320423204
case RecurKind::FindLastIVUMax:
23205+
case RecurKind::FMaxNum:
23206+
case RecurKind::FMinNum:
2320523207
case RecurKind::FMaximumNum:
2320623208
case RecurKind::FMinimumNum:
2320723209
case RecurKind::None:
@@ -23339,6 +23341,8 @@ class HorizontalReduction {
2333923341
case RecurKind::FindFirstIVUMin:
2334023342
case RecurKind::FindLastIVSMax:
2334123343
case RecurKind::FindLastIVUMax:
23344+
case RecurKind::FMaxNum:
23345+
case RecurKind::FMinNum:
2334223346
case RecurKind::FMaximumNum:
2334323347
case RecurKind::FMinimumNum:
2334423348
case RecurKind::None:
@@ -23441,6 +23445,8 @@ class HorizontalReduction {
2344123445
case RecurKind::FindFirstIVUMin:
2344223446
case RecurKind::FindLastIVSMax:
2344323447
case RecurKind::FindLastIVUMax:
23448+
case RecurKind::FMaxNum:
23449+
case RecurKind::FMinNum:
2344423450
case RecurKind::FMaximumNum:
2344523451
case RecurKind::FMinimumNum:
2344623452
case RecurKind::None:

‎llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
8484
return ResTy;
8585
}
8686
case Instruction::ICmp:
87+
case Instruction::FCmp:
8788
case VPInstruction::ActiveLaneMask:
8889
assert(inferScalarType(R->getOperand(0)) ==
8990
inferScalarType(R->getOperand(1)) &&

‎llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,3 +652,163 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
652652
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
653653
}
654654
}
655+
656+
boolVPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
657+
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
658+
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
659+
RedPhiR->getBackedgeValue()->getDefiningRecipe());
660+
if (!MinMaxR)
661+
returnnullptr;
662+
663+
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
664+
if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
665+
!(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
666+
returnnullptr;
667+
668+
#ifndef NDEBUG
669+
Intrinsic::ID RdxIntrinsicId =
670+
RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
671+
: Intrinsic::minnum;
672+
assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
673+
cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
674+
RdxIntrinsicId) ||
675+
(RepR &&
676+
cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() ==
677+
RdxIntrinsicId) &&
678+
"Intrinsic did not match recurrence kind");
679+
#endif
680+
681+
if (MinMaxR->getOperand(0) == RedPhiR)
682+
return MinMaxR->getOperand(1);
683+
684+
assert(MinMaxR->getOperand(1) == RedPhiR &&
685+
"Reduction phi operand expected");
686+
return MinMaxR->getOperand(0);
687+
};
688+
689+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
690+
VPReductionPHIRecipe *RedPhiR =nullptr;
691+
bool HasUnsupportedPhi =false;
692+
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
693+
if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
694+
continue;
695+
auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
696+
if (!Cur) {
697+
// TODO: Also support fixed-order recurrence phis.
698+
HasUnsupportedPhi =true;
699+
continue;
700+
}
701+
// For now, only a single reduction is supported.
702+
// TODO: Support multiple MaxNum/MinNum reductions and other reductions.
703+
if (RedPhiR)
704+
returnfalse;
705+
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
706+
Cur->getRecurrenceKind() != RecurKind::FMinNum) {
707+
HasUnsupportedPhi =true;
708+
continue;
709+
}
710+
RedPhiR = Cur;
711+
}
712+
713+
if (!RedPhiR)
714+
returntrue;
715+
716+
// We won't be able to resume execution in the scalar tail, if there are
717+
// unsupported header phis or there is no scalar tail at all, due to
718+
// tail-folding.
719+
if (HasUnsupportedPhi || !Plan.hasScalarTail())
720+
returnfalse;
721+
722+
VPValue *MinMaxOp =GetMinMaxCompareValue(RedPhiR);
723+
if (!MinMaxOp)
724+
returnfalse;
725+
726+
RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
727+
assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) &&
728+
"unsupported reduction");
729+
730+
/// Check if the vector loop of \p Plan can early exit and restart
731+
/// execution of last vector iteration in the scalar loop. This requires all
732+
/// recipes up to early exit point be side-effect free as they are
733+
/// re-executed. Currently we check that the loop is free of any recipe that
734+
/// may write to memory. Expected to operate on an early VPlan w/o nested
735+
/// regions.
736+
for (VPBlockBase *VPB :vp_depth_first_shallow(
737+
Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
738+
auto *VPBB = cast<VPBasicBlock>(VPB);
739+
for (auto &R : *VPBB) {
740+
if (R.mayWriteToMemory() &&
741+
!match(&R,m_BranchOnCount(m_VPValue(),m_VPValue())))
742+
returnfalse;
743+
}
744+
}
745+
746+
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
747+
VPBuilderBuilder(LatchVPBB->getTerminator());
748+
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
749+
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
750+
"Unexpected terminator");
751+
auto *IsLatchExitTaken =
752+
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
753+
LatchExitingBranch->getOperand(1));
754+
755+
VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
756+
VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
757+
auto *AnyExitTaken =
758+
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
759+
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
760+
LatchExitingBranch->eraseFromParent();
761+
762+
// If we exit early due to NaNs, compute the final reduction result based on
763+
// the reduction phi at the beginning of the last vector iteration.
764+
auto *RdxResult = find_singleton<VPSingleDefRecipe>(
765+
RedPhiR->users(), [](VPUser *U,bool) -> VPSingleDefRecipe * {
766+
auto *VPI = dyn_cast<VPInstruction>(U);
767+
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
768+
return VPI;
769+
returnnullptr;
770+
});
771+
772+
auto *MiddleVPBB = Plan.getMiddleBlock();
773+
Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
774+
auto *NewSel =
775+
Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
776+
RdxResult->setOperand(1, NewSel);
777+
778+
auto *ScalarPH = Plan.getScalarPreheader();
779+
// Update resume phis for inductions in the scalar preheader. If AnyNaN is
780+
// true, the resume from the start of the last vector iteration via the
781+
// canonical IV, otherwise from the original value.
782+
for (auto &R : ScalarPH->phis()) {
783+
auto *ResumeR = cast<VPPhi>(&R);
784+
VPValue *VecV = ResumeR->getOperand(0);
785+
if (VecV == RdxResult)
786+
continue;
787+
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
788+
if (DerivedIV->getNumUsers() ==1 &&
789+
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
790+
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
791+
&Plan.getVectorTripCount());
792+
DerivedIV->moveAfter(&*Builder.getInsertPoint());
793+
DerivedIV->setOperand(1, NewSel);
794+
continue;
795+
}
796+
}
797+
// Bail out and abandon the current, partially modified, VPlan if we
798+
// encounter resume phi that cannot be updated yet.
799+
if (VecV != &Plan.getVectorTripCount()) {
800+
LLVM_DEBUG(dbgs() <<"Found resume phi we cannot update for VPlan with"
801+
"FMaxNum/FMinNum reduction.\n");
802+
returnfalse;
803+
}
804+
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
805+
ResumeR->setOperand(0, NewSel);
806+
}
807+
808+
auto *MiddleTerm = MiddleVPBB->getTerminator();
809+
Builder.setInsertPoint(MiddleTerm);
810+
VPValue *MiddleCond = MiddleTerm->getOperand(0);
811+
VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
812+
MiddleTerm->setOperand(0, NewCond);
813+
returntrue;
814+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp