Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit24b87b8

Browse files
committed
[VPlan] Skip cost verification for loops with EVL gather/scatter.
The VPlan-based cost model use vp_gather/vp_scatter for gather/scattercosts, which is different to the legacy cost model and cannot be matchedthere. Don't verify the costs match for plans containing gather/scatterswith EVL.Fixes#169948.
1 parent9ffd2e4 commit24b87b8

File tree

2 files changed

+139
-11
lines changed

2 files changed

+139
-11
lines changed

‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp‎

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7187,17 +7187,29 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71877187
VPCostContextCostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
71887188
*CM.PSE.getSE(), OrigLoop);
71897189
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
7190-
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
7191-
// with early exits and plans with additional VPlan simplifications. The
7192-
// legacy cost model doesn't properly model costs for such loops.
7193-
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
7194-
!Legal->getLAI()->getSymbolicStrides().empty() ||
7195-
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
7196-
CostCtx, OrigLoop,
7197-
BestFactor.Width) ||
7198-
planContainsAdditionalSimplifications(
7199-
getPlanFor(LegacyVF.Width), CostCtx, OrigLoop, LegacyVF.Width)) &&
7200-
" VPlan cost model and legacy cost model disagreed");
7190+
// Verify that the VPlan-based and legacy cost models agree, except for
7191+
// * VPlans with early exits,
7192+
// * VPlans with additional VPlan simplifications,
7193+
// * EVL-based VPlans with gather/scatters (the VPlan-based cost model uses
7194+
// vp_scatter/vp_gather).
7195+
// The legacy cost model doesn't properly model costs for such loops.
7196+
bool UsesEVLGatherScatter =
7197+
any_of(VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(
7198+
BestPlan.getVectorLoopRegion()->getEntry())),
7199+
[](VPBasicBlock *VPBB) {
7200+
returnany_of(*VPBB, [](VPRecipeBase &R) {
7201+
return isa<VPWidenLoadEVLRecipe, VPWidenStoreEVLRecipe>(&R) &&
7202+
!cast<VPWidenMemoryRecipe>(&R)->isConsecutive();
7203+
});
7204+
});
7205+
assert(
7206+
(BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
7207+
!Legal->getLAI()->getSymbolicStrides().empty() || UsesEVLGatherScatter ||
7208+
planContainsAdditionalSimplifications(
7209+
getPlanFor(BestFactor.Width), CostCtx, OrigLoop, BestFactor.Width) ||
7210+
planContainsAdditionalSimplifications(
7211+
getPlanFor(LegacyVF.Width), CostCtx, OrigLoop, LegacyVF.Width)) &&
7212+
" VPlan cost model and legacy cost model disagreed");
72017213
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost >0) &&
72027214
"when vectorizing, the scalar cost must be computed.");
72037215
#endif

‎llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll‎

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,3 +219,119 @@ loop:
219219
exit:
220220
retvoid
221221
}
222+
223+
; Test for https://github.com/llvm/llvm-project/issues/169948.
224+
definei8@mixed_gather_scatters(ptr%A,ptr%B,ptr%C) #0 {
225+
; RVA23-LABEL: @mixed_gather_scatters(
226+
; RVA23-NEXT: entry:
227+
; RVA23-NEXT: br label [[VECTOR_PH:%.*]]
228+
; RVA23: vector.ph:
229+
; RVA23-NEXT: br label [[VECTOR_BODY:%.*]]
230+
; RVA23: vector.body:
231+
; RVA23-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
232+
; RVA23-NEXT: [[AVL:%.*]] = phi i32 [ 10, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
233+
; RVA23-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true)
234+
; RVA23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A:%.*]], align 8
235+
; RVA23-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP1]], i64 0
236+
; RVA23-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
237+
; RVA23-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP0]])
238+
; RVA23-NEXT: [[TMP2:%.*]] = icmp sgt <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], zeroinitializer
239+
; RVA23-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i1> [[TMP2]] to <vscale x 2 x i8>
240+
; RVA23-NEXT: [[TMP4:%.*]] = or <vscale x 2 x i8> [[VEC_PHI]], [[TMP3]]
241+
; RVA23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B:%.*]], align 8
242+
; RVA23-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP5]], i64 0
243+
; RVA23-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
244+
; RVA23-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT2]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP0]])
245+
; RVA23-NEXT: [[TMP6:%.*]] = icmp sgt <vscale x 2 x i64> [[WIDE_MASKED_GATHER3]], zeroinitializer
246+
; RVA23-NEXT: [[TMP7:%.*]] = zext <vscale x 2 x i1> [[TMP6]] to <vscale x 2 x i8>
247+
; RVA23-NEXT: [[TMP8:%.*]] = or <vscale x 2 x i8> [[TMP4]], [[TMP7]]
248+
; RVA23-NEXT: [[TMP9:%.*]] = or <vscale x 2 x i8> [[TMP8]], splat (i8 1)
249+
; RVA23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C:%.*]], align 8
250+
; RVA23-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
251+
; RVA23-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT4]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
252+
; RVA23-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP0]])
253+
; RVA23-NEXT: [[TMP11:%.*]] = icmp sgt <vscale x 2 x i64> [[WIDE_MASKED_GATHER6]], zeroinitializer
254+
; RVA23-NEXT: [[TMP12:%.*]] = zext <vscale x 2 x i1> [[TMP11]] to <vscale x 2 x i8>
255+
; RVA23-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i8> [[TMP9]], [[TMP12]]
256+
; RVA23-NEXT: [[TMP14]] = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> [[TMP13]], <vscale x 2 x i8> [[VEC_PHI]], i32 [[TMP0]])
257+
; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]]
258+
; RVA23-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
259+
; RVA23-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
260+
; RVA23: middle.block:
261+
; RVA23-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> [[TMP14]])
262+
; RVA23-NEXT: br label [[EXIT:%.*]]
263+
; RVA23: exit:
264+
; RVA23-NEXT: ret i8 [[TMP16]]
265+
;
266+
; RVA23ZVL1024B-LABEL: @mixed_gather_scatters(
267+
; RVA23ZVL1024B-NEXT: entry:
268+
; RVA23ZVL1024B-NEXT: br label [[VECTOR_PH:%.*]]
269+
; RVA23ZVL1024B: vector.ph:
270+
; RVA23ZVL1024B-NEXT: br label [[VECTOR_BODY:%.*]]
271+
; RVA23ZVL1024B: vector.body:
272+
; RVA23ZVL1024B-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
273+
; RVA23ZVL1024B-NEXT: [[AVL:%.*]] = phi i32 [ 10, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
274+
; RVA23ZVL1024B-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 1, i1 true)
275+
; RVA23ZVL1024B-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A:%.*]], align 8
276+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[TMP1]], i64 0
277+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
278+
; RVA23ZVL1024B-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP0]])
279+
; RVA23ZVL1024B-NEXT: [[TMP2:%.*]] = icmp sgt <vscale x 1 x i64> [[WIDE_MASKED_GATHER]], zeroinitializer
280+
; RVA23ZVL1024B-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
281+
; RVA23ZVL1024B-NEXT: [[TMP4:%.*]] = or <vscale x 1 x i8> [[VEC_PHI]], [[TMP3]]
282+
; RVA23ZVL1024B-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B:%.*]], align 8
283+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[TMP5]], i64 0
284+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
285+
; RVA23ZVL1024B-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 [[BROADCAST_SPLAT2]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP0]])
286+
; RVA23ZVL1024B-NEXT: [[TMP6:%.*]] = icmp sgt <vscale x 1 x i64> [[WIDE_MASKED_GATHER3]], zeroinitializer
287+
; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = zext <vscale x 1 x i1> [[TMP6]] to <vscale x 1 x i8>
288+
; RVA23ZVL1024B-NEXT: [[TMP8:%.*]] = or <vscale x 1 x i8> [[TMP4]], [[TMP7]]
289+
; RVA23ZVL1024B-NEXT: [[TMP9:%.*]] = or <vscale x 1 x i8> [[TMP8]], splat (i8 1)
290+
; RVA23ZVL1024B-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C:%.*]], align 8
291+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[TMP10]], i64 0
292+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT4]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
293+
; RVA23ZVL1024B-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 [[BROADCAST_SPLAT5]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP0]])
294+
; RVA23ZVL1024B-NEXT: [[TMP11:%.*]] = icmp sgt <vscale x 1 x i64> [[WIDE_MASKED_GATHER6]], zeroinitializer
295+
; RVA23ZVL1024B-NEXT: [[TMP12:%.*]] = zext <vscale x 1 x i1> [[TMP11]] to <vscale x 1 x i8>
296+
; RVA23ZVL1024B-NEXT: [[TMP13:%.*]] = or <vscale x 1 x i8> [[TMP9]], [[TMP12]]
297+
; RVA23ZVL1024B-NEXT: [[TMP14]] = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i8> [[TMP13]], <vscale x 1 x i8> [[VEC_PHI]], i32 [[TMP0]])
298+
; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]]
299+
; RVA23ZVL1024B-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
300+
; RVA23ZVL1024B-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
301+
; RVA23ZVL1024B: middle.block:
302+
; RVA23ZVL1024B-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> [[TMP14]])
303+
; RVA23ZVL1024B-NEXT: br label [[EXIT:%.*]]
304+
; RVA23ZVL1024B: exit:
305+
; RVA23ZVL1024B-NEXT: ret i8 [[TMP16]]
306+
;
307+
entry:
308+
brlabel%loop
309+
310+
loop:
311+
%iv =phii32 [0,%entry ], [%iv.next,%loop ]
312+
%accum =phii8 [0,%entry ], [%or.4,%loop ]
313+
%ptr.0 =loadptr,ptr%A,align8
314+
%val.0 =loadi64,ptr%ptr.0,align8
315+
%cmp.0 =icmpsgti64%val.0,0
316+
%ext.0 =zexti1%cmp.0toi8
317+
%or.0 =ori8%accum,%ext.0
318+
%ptr.1 =loadptr,ptr%B,align8
319+
%val.1 =loadi64,ptr%ptr.1,align8
320+
%cmp.1 =icmpsgti64%val.1,0
321+
%ext.1 =zexti1%cmp.1toi8
322+
%or.1 =ori8%or.0,%ext.1
323+
%or.2 =ori8%or.1,1
324+
%ptr.4 =loadptr,ptr%C,align8
325+
%val.4 =loadi64,ptr%ptr.4,align8
326+
%cmp.4 =icmpsgti64%val.4,0
327+
%ext.4 =zexti1%cmp.4toi8
328+
%or.4 =ori8%or.2,%ext.4
329+
%iv.next =addi32%iv,1
330+
%exitcond =icmpeqi32%iv,9
331+
bri1%exitcond,label%exit,label%loop
332+
333+
exit:
334+
reti8%or.4
335+
}
336+
337+
attributes #0 = {"target-features"="+zve64x,+zvl256b" }

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp