- Notifications
You must be signed in to change notification settings - Fork14.5k
[CostModel] Provide a default model for histogram intrinsics#149348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
Conversation
llvmbot commentedJul 17, 2025 • edited
Loading Uh oh!
There was an error while loading.Please reload this page.
edited
Uh oh!
There was an error while loading.Please reload this page.
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-analysis Author: Graham Hunter (huntergr-arm) ChangesSince we scalarize these intrinsics when the target does not support them, we should model that for costing purposes. Patch is 42.55 KiB, truncated to 20.00 KiB below, full version:https://github.com/llvm/llvm-project/pull/149348.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.hindex ddc8a5eaffa94..a56afe05f66f6 100644--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h@@ -878,9 +878,6 @@ class TargetTransformInfoImplBase { switch (ICA.getID()) { default: break;- case Intrinsic::experimental_vector_histogram_add:- // For now, we want explicit support from the target for histograms.- return InstructionCost::getInvalid(); case Intrinsic::allow_runtime_check: case Intrinsic::allow_ubsan_check: case Intrinsic::annotation:diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.hindex 1d7c41452f7d5..5d5705f82e2f7 100644--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h@@ -2110,6 +2110,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { } case Intrinsic::get_active_lane_mask: case Intrinsic::experimental_vector_match:+ case Intrinsic::experimental_vector_histogram_add: return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); case Intrinsic::modf: case Intrinsic::sincos:@@ -2458,6 +2459,51 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind); return Cost; }+ case Intrinsic::experimental_vector_histogram_add:+ case Intrinsic::experimental_vector_histogram_uadd_sat:+ case Intrinsic::experimental_vector_histogram_umax:+ case Intrinsic::experimental_vector_histogram_umin: {+ FixedVectorType *PtrsTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[0]);+ Type *EltTy = ICA.getArgTypes()[1];++ // Targets with scalable vectors must handle this on their own.+ if (!PtrsTy)+ return InstructionCost::getInvalid();++ Align Alignment = thisT()->DL.getABITypeAlign(EltTy);+ InstructionCost Cost = 0;+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, PtrsTy,+ CostKind, 1, nullptr, nullptr);+ Cost += thisT()->getMemoryOpCost(Instruction::Load, EltTy, Alignment, 0,+ CostKind);+ switch (IID) {+ default:+ llvm_unreachable("Unhandled histogram update operation.");+ case Intrinsic::experimental_vector_histogram_add:+ Cost +=+ thisT()->getArithmeticInstrCost(Instruction::Add, EltTy, CostKind);+ break;+ case Intrinsic::experimental_vector_histogram_uadd_sat: {+ IntrinsicCostAttributes UAddSat(Intrinsic::uadd_sat, EltTy, {EltTy});+ Cost += thisT()->getIntrinsicInstrCost(UAddSat, CostKind);+ break;+ }+ case Intrinsic::experimental_vector_histogram_umax: {+ IntrinsicCostAttributes UMax(Intrinsic::umax, EltTy, {EltTy});+ Cost += thisT()->getIntrinsicInstrCost(UMax, CostKind);+ break;+ }+ case Intrinsic::experimental_vector_histogram_umin: {+ IntrinsicCostAttributes UMin(Intrinsic::umin, EltTy, {EltTy});+ Cost += thisT()->getIntrinsicInstrCost(UMin, CostKind);+ break;+ }+ }+ Cost += thisT()->getMemoryOpCost(Instruction::Store, EltTy, Alignment, 0,+ CostKind);+ Cost *= PtrsTy->getNumElements();+ return Cost;+ } case Intrinsic::get_active_lane_mask: { Type *ArgTy = ICA.getArgTypes()[0]; EVT ResVT = getTLI()->getValueType(DL, RetTy, true);diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cppindex 90d3d92d6bbf5..5b29da919c6da 100644--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp@@ -547,7 +547,17 @@ static bool isUnpackedVectorVT(EVT VecVT) { VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock; }-static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {+static InstructionCost getHistogramCost(const AArch64Subtarget *ST,+ const IntrinsicCostAttributes &ICA) {+ // We need to know at least the number of elements in the vector of buckets+ // and the size of each element to update.+ if (ICA.getArgTypes().size() < 2)+ return InstructionCost::getInvalid();++ // Only interested in costing for the hardware instruction from SVE2.+ if (!ST->hasSVE2())+ return InstructionCost::getInvalid();+ Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements unsigned TotalHistCnts = 1;@@ -572,9 +582,11 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize; TotalHistCnts = EC / NaturalVectorWidth;++ return InstructionCost(BaseHistCntCost * TotalHistCnts); }- return InstructionCost(BaseHistCntCost * TotalHistCnts);+ return InstructionCost::getInvalid(); } InstructionCost@@ -590,10 +602,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return InstructionCost::getInvalid(); switch (ICA.getID()) {- case Intrinsic::experimental_vector_histogram_add:- if (!ST->hasSVE2())- return InstructionCost::getInvalid();- return getHistogramCost(ICA);+ case Intrinsic::experimental_vector_histogram_add: {+ InstructionCost HistCost = getHistogramCost(ST, ICA);+ // If the cost isn't valid, we may still be able to scalarize+ if (HistCost.isValid())+ return HistCost;+ break;+ } case Intrinsic::umin: case Intrinsic::umax: case Intrinsic::smin:diff --git a/llvm/test/Analysis/CostModel/AArch64/histograms.ll b/llvm/test/Analysis/CostModel/AArch64/histograms.llnew file mode 100644index 0000000000000..c0489587551b0--- /dev/null+++ b/llvm/test/Analysis/CostModel/AArch64/histograms.ll@@ -0,0 +1,178 @@+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECK-NEON+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-SVE+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2 | FileCheck %s --check-prefix=CHECK-SVE2++define void @histograms() {+; CHECK-NEON-LABEL: 'histograms'+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void+;+; CHECK-SVE-LABEL: 'histograms'+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void+;+; CHECK-SVE2-LABEL: 'histograms'+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Inva...[truncated] |
@llvm/pr-subscribers-llvm-transforms Author: Graham Hunter (huntergr-arm) ChangesSince we scalarize these intrinsics when the target does not support them, we should model that for costing purposes. Patch is 42.55 KiB, truncated to 20.00 KiB below, full version:https://github.com/llvm/llvm-project/pull/149348.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.hindex ddc8a5eaffa94..a56afe05f66f6 100644--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h@@ -878,9 +878,6 @@ class TargetTransformInfoImplBase { switch (ICA.getID()) { default: break;- case Intrinsic::experimental_vector_histogram_add:- // For now, we want explicit support from the target for histograms.- return InstructionCost::getInvalid(); case Intrinsic::allow_runtime_check: case Intrinsic::allow_ubsan_check: case Intrinsic::annotation:diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.hindex 1d7c41452f7d5..5d5705f82e2f7 100644--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h@@ -2110,6 +2110,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { } case Intrinsic::get_active_lane_mask: case Intrinsic::experimental_vector_match:+ case Intrinsic::experimental_vector_histogram_add: return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); case Intrinsic::modf: case Intrinsic::sincos:@@ -2458,6 +2459,51 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind); return Cost; }+ case Intrinsic::experimental_vector_histogram_add:+ case Intrinsic::experimental_vector_histogram_uadd_sat:+ case Intrinsic::experimental_vector_histogram_umax:+ case Intrinsic::experimental_vector_histogram_umin: {+ FixedVectorType *PtrsTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[0]);+ Type *EltTy = ICA.getArgTypes()[1];++ // Targets with scalable vectors must handle this on their own.+ if (!PtrsTy)+ return InstructionCost::getInvalid();++ Align Alignment = thisT()->DL.getABITypeAlign(EltTy);+ InstructionCost Cost = 0;+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, PtrsTy,+ CostKind, 1, nullptr, nullptr);+ Cost += thisT()->getMemoryOpCost(Instruction::Load, EltTy, Alignment, 0,+ CostKind);+ switch (IID) {+ default:+ llvm_unreachable("Unhandled histogram update operation.");+ case Intrinsic::experimental_vector_histogram_add:+ Cost +=+ thisT()->getArithmeticInstrCost(Instruction::Add, EltTy, CostKind);+ break;+ case Intrinsic::experimental_vector_histogram_uadd_sat: {+ IntrinsicCostAttributes UAddSat(Intrinsic::uadd_sat, EltTy, {EltTy});+ Cost += thisT()->getIntrinsicInstrCost(UAddSat, CostKind);+ break;+ }+ case Intrinsic::experimental_vector_histogram_umax: {+ IntrinsicCostAttributes UMax(Intrinsic::umax, EltTy, {EltTy});+ Cost += thisT()->getIntrinsicInstrCost(UMax, CostKind);+ break;+ }+ case Intrinsic::experimental_vector_histogram_umin: {+ IntrinsicCostAttributes UMin(Intrinsic::umin, EltTy, {EltTy});+ Cost += thisT()->getIntrinsicInstrCost(UMin, CostKind);+ break;+ }+ }+ Cost += thisT()->getMemoryOpCost(Instruction::Store, EltTy, Alignment, 0,+ CostKind);+ Cost *= PtrsTy->getNumElements();+ return Cost;+ } case Intrinsic::get_active_lane_mask: { Type *ArgTy = ICA.getArgTypes()[0]; EVT ResVT = getTLI()->getValueType(DL, RetTy, true);diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cppindex 90d3d92d6bbf5..5b29da919c6da 100644--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp@@ -547,7 +547,17 @@ static bool isUnpackedVectorVT(EVT VecVT) { VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock; }-static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {+static InstructionCost getHistogramCost(const AArch64Subtarget *ST,+ const IntrinsicCostAttributes &ICA) {+ // We need to know at least the number of elements in the vector of buckets+ // and the size of each element to update.+ if (ICA.getArgTypes().size() < 2)+ return InstructionCost::getInvalid();++ // Only interested in costing for the hardware instruction from SVE2.+ if (!ST->hasSVE2())+ return InstructionCost::getInvalid();+ Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements unsigned TotalHistCnts = 1;@@ -572,9 +582,11 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize; TotalHistCnts = EC / NaturalVectorWidth;++ return InstructionCost(BaseHistCntCost * TotalHistCnts); }- return InstructionCost(BaseHistCntCost * TotalHistCnts);+ return InstructionCost::getInvalid(); } InstructionCost@@ -590,10 +602,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return InstructionCost::getInvalid(); switch (ICA.getID()) {- case Intrinsic::experimental_vector_histogram_add:- if (!ST->hasSVE2())- return InstructionCost::getInvalid();- return getHistogramCost(ICA);+ case Intrinsic::experimental_vector_histogram_add: {+ InstructionCost HistCost = getHistogramCost(ST, ICA);+ // If the cost isn't valid, we may still be able to scalarize+ if (HistCost.isValid())+ return HistCost;+ break;+ } case Intrinsic::umin: case Intrinsic::umax: case Intrinsic::smin:diff --git a/llvm/test/Analysis/CostModel/AArch64/histograms.ll b/llvm/test/Analysis/CostModel/AArch64/histograms.llnew file mode 100644index 0000000000000..c0489587551b0--- /dev/null+++ b/llvm/test/Analysis/CostModel/AArch64/histograms.ll@@ -0,0 +1,178 @@+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECK-NEON+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-SVE+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2 | FileCheck %s --check-prefix=CHECK-SVE2++define void @histograms() {+; CHECK-NEON-LABEL: 'histograms'+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void+;+; CHECK-SVE-LABEL: 'histograms'+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void+;+; CHECK-SVE2-LABEL: 'histograms'+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)+; CHECK-SVE2-NEXT: Cost Model: Inva...[truncated] |
paschalis-mpeis left a comment• edited
Loading Uh oh!
There was an error while loading.Please reload this page.
edited
Uh oh!
There was an error while loading.Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others.Learn more.
Thanks, Graham, LGTM!
Up to you, but may be better to get another look from the other reviewers if they are more active in vectorization / cost-modelling.
Since we scalarize these intrinsics when the target does not support them, we should model that for costing purposes.