Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
ScalarizeMaskedMemIntrin.cpp
Go to the documentation of this file.
1//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2// intrinsics
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass replaces masked memory intrinsics - when unsupported by the target
11// - with a chain of basic blocks, that deal with the elements one-by-one if the
12// appropriate mask bit is set.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/DomTreeUpdater.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/Analysis/VectorUtils.h"
21#include "llvm/IR/BasicBlock.h"
22#include "llvm/IR/Constant.h"
23#include "llvm/IR/Constants.h"
24#include "llvm/IR/DerivedTypes.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/IntrinsicInst.h"
31#include "llvm/IR/Type.h"
32#include "llvm/IR/Value.h"
33#include "llvm/InitializePasses.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/Casting.h"
36#include "llvm/Transforms/Scalar.h"
37#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38#include <cassert>
39#include <optional>
40
41using namespacellvm;
42
43#define DEBUG_TYPE "scalarize-masked-mem-intrin"
44
45namespace{
46
47classScalarizeMaskedMemIntrinLegacyPass :publicFunctionPass {
48public:
49staticcharID;// Pass identification, replacement for typeid
50
51explicit ScalarizeMaskedMemIntrinLegacyPass() :FunctionPass(ID) {
52initializeScalarizeMaskedMemIntrinLegacyPassPass(
53 *PassRegistry::getPassRegistry());
54 }
55
56boolrunOnFunction(Function &F)override;
57
58StringRefgetPassName() const override{
59return"Scalarize Masked Memory Intrinsics";
60 }
61
62voidgetAnalysisUsage(AnalysisUsage &AU) const override{
63 AU.addRequired<TargetTransformInfoWrapperPass>();
64 AU.addPreserved<DominatorTreeWrapperPass>();
65 }
66};
67
68}// end anonymous namespace
69
70staticbooloptimizeBlock(BasicBlock &BB,bool &ModifiedDT,
71constTargetTransformInfo &TTI,constDataLayout &DL,
72bool HasBranchDivergence,DomTreeUpdater *DTU);
73staticbooloptimizeCallInst(CallInst *CI,bool &ModifiedDT,
74constTargetTransformInfo &TTI,
75constDataLayout &DL,bool HasBranchDivergence,
76DomTreeUpdater *DTU);
77
78char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
79
80INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass,DEBUG_TYPE,
81"Scalarize unsupported masked memory intrinsics",false,
82false)
83INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
84INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
85INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass,DEBUG_TYPE,
86 "Scalarizeunsupported masked memoryintrinsics",false,
87false)
88
89FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
90returnnew ScalarizeMaskedMemIntrinLegacyPass();
91}
92
93staticboolisConstantIntVector(Value *Mask) {
94Constant *C = dyn_cast<Constant>(Mask);
95if (!C)
96returnfalse;
97
98unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
99for (unsigned i = 0; i != NumElts; ++i) {
100Constant *CElt =C->getAggregateElement(i);
101if (!CElt || !isa<ConstantInt>(CElt))
102returnfalse;
103 }
104
105returntrue;
106}
107
108staticunsignedadjustForEndian(constDataLayout &DL,unsigned VectorWidth,
109unsignedIdx) {
110returnDL.isBigEndian() ? VectorWidth - 1 -Idx :Idx;
111}
112
113// Translate a masked load intrinsic like
114// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
115// <16 x i1> %mask, <16 x i32> %passthru)
116// to a chain of basic blocks, with loading element one-by-one if
117// the appropriate mask bit is set
118//
119// %1 = bitcast i8* %addr to i32*
120// %2 = extractelement <16 x i1> %mask, i32 0
121// br i1 %2, label %cond.load, label %else
122//
123// cond.load: ; preds = %0
124// %3 = getelementptr i32* %1, i32 0
125// %4 = load i32* %3
126// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
127// br label %else
128//
129// else: ; preds = %0, %cond.load
130// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
131// %6 = extractelement <16 x i1> %mask, i32 1
132// br i1 %6, label %cond.load1, label %else2
133//
134// cond.load1: ; preds = %else
135// %7 = getelementptr i32* %1, i32 1
136// %8 = load i32* %7
137// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
138// br label %else2
139//
140// else2: ; preds = %else, %cond.load1
141// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
142// %10 = extractelement <16 x i1> %mask, i32 2
143// br i1 %10, label %cond.load4, label %else5
144//
145staticvoidscalarizeMaskedLoad(constDataLayout &DL,bool HasBranchDivergence,
146CallInst *CI,DomTreeUpdater *DTU,
147bool &ModifiedDT) {
148Value *Ptr = CI->getArgOperand(0);
149Value *Alignment = CI->getArgOperand(1);
150Value *Mask = CI->getArgOperand(2);
151Value *Src0 = CI->getArgOperand(3);
152
153constAlign AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
154VectorType *VecType = cast<FixedVectorType>(CI->getType());
155
156Type *EltTy = VecType->getElementType();
157
158IRBuilder<> Builder(CI->getContext());
159Instruction *InsertPt = CI;
160BasicBlock *IfBlock = CI->getParent();
161
162 Builder.SetInsertPoint(InsertPt);
163 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
164
165// Short-cut if the mask is all-true.
166if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
167LoadInst *NewI = Builder.CreateAlignedLoad(VecType,Ptr, AlignVal);
168 NewI->copyMetadata(*CI);
169 NewI->takeName(CI);
170 CI->replaceAllUsesWith(NewI);
171 CI->eraseFromParent();
172return;
173 }
174
175// Adjust alignment for the scalar instruction.
176constAlign AdjustedAlignVal =
177commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
178unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
179
180// The result vector
181Value *VResult = Src0;
182
183if (isConstantIntVector(Mask)) {
184for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
185if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
186continue;
187Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr,Idx);
188LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
189 VResult = Builder.CreateInsertElement(VResult, Load,Idx);
190 }
191 CI->replaceAllUsesWith(VResult);
192 CI->eraseFromParent();
193return;
194 }
195
196// Optimize the case where the "masked load" is a predicated load - that is,
197// where the mask is the splat of a non-constant scalar boolean. In that case,
198// use that splated value as the guard on a conditional vector load.
199if (isSplatValue(Mask,/*Index=*/0)) {
200Value *Predicate = Builder.CreateExtractElement(Mask,uint64_t(0ull),
201 Mask->getName() +".first");
202Instruction *ThenTerm =
203SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
204/*BranchWeights=*/nullptr, DTU);
205
206BasicBlock *CondBlock = ThenTerm->getParent();
207 CondBlock->setName("cond.load");
208 Builder.SetInsertPoint(CondBlock->getTerminator());
209LoadInst *Load = Builder.CreateAlignedLoad(VecType,Ptr, AlignVal,
210 CI->getName() +".cond.load");
211 Load->copyMetadata(*CI);
212
213BasicBlock *PostLoad = ThenTerm->getSuccessor(0);
214 Builder.SetInsertPoint(PostLoad, PostLoad->begin());
215PHINode *Phi = Builder.CreatePHI(VecType,/*NumReservedValues=*/2);
216 Phi->addIncoming(Load, CondBlock);
217 Phi->addIncoming(Src0, IfBlock);
218 Phi->takeName(CI);
219
220 CI->replaceAllUsesWith(Phi);
221 CI->eraseFromParent();
222 ModifiedDT =true;
223return;
224 }
225// If the mask is not v1i1, use scalar bit test operations. This generates
226// better results on X86 at least. However, don't do this on GPUs and other
227// machines with divergence, as there each i1 needs a vector register.
228Value *SclrMask =nullptr;
229if (VectorWidth != 1 && !HasBranchDivergence) {
230Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
231 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy,"scalar_mask");
232 }
233
234for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
235// Fill the "else" block, created in the previous iteration
236//
237// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else,
238// %else ] %mask_1 = and i16 %scalar_mask, i32 1 << Idx %cond = icmp ne i16
239// %mask_1, 0 br i1 %mask_1, label %cond.load, label %else
240//
241// On GPUs, use
242// %cond = extrectelement %mask, Idx
243// instead
244Value *Predicate;
245if (SclrMask !=nullptr) {
246Value *Mask = Builder.getInt(APInt::getOneBitSet(
247 VectorWidth,adjustForEndian(DL, VectorWidth,Idx)));
248Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
249 Builder.getIntN(VectorWidth, 0));
250 }else {
251Predicate = Builder.CreateExtractElement(Mask,Idx);
252 }
253
254// Create "cond" block
255//
256// %EltAddr = getelementptr i32* %1, i32 0
257// %Elt = load i32* %EltAddr
258// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
259//
260Instruction *ThenTerm =
261SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
262/*BranchWeights=*/nullptr, DTU);
263
264BasicBlock *CondBlock = ThenTerm->getParent();
265 CondBlock->setName("cond.load");
266
267 Builder.SetInsertPoint(CondBlock->getTerminator());
268Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr,Idx);
269LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
270Value *NewVResult = Builder.CreateInsertElement(VResult, Load,Idx);
271
272// Create "else" block, fill it in the next iteration
273BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
274 NewIfBlock->setName("else");
275BasicBlock *PrevIfBlock = IfBlock;
276 IfBlock = NewIfBlock;
277
278// Create the phi to join the new and previous value.
279 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
280PHINode *Phi = Builder.CreatePHI(VecType, 2,"res.phi.else");
281 Phi->addIncoming(NewVResult, CondBlock);
282 Phi->addIncoming(VResult, PrevIfBlock);
283 VResult = Phi;
284 }
285
286 CI->replaceAllUsesWith(VResult);
287 CI->eraseFromParent();
288
289 ModifiedDT =true;
290}
291
292// Translate a masked store intrinsic, like
293// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
294// <16 x i1> %mask)
295// to a chain of basic blocks, that stores element one-by-one if
296// the appropriate mask bit is set
297//
298// %1 = bitcast i8* %addr to i32*
299// %2 = extractelement <16 x i1> %mask, i32 0
300// br i1 %2, label %cond.store, label %else
301//
302// cond.store: ; preds = %0
303// %3 = extractelement <16 x i32> %val, i32 0
304// %4 = getelementptr i32* %1, i32 0
305// store i32 %3, i32* %4
306// br label %else
307//
308// else: ; preds = %0, %cond.store
309// %5 = extractelement <16 x i1> %mask, i32 1
310// br i1 %5, label %cond.store1, label %else2
311//
312// cond.store1: ; preds = %else
313// %6 = extractelement <16 x i32> %val, i32 1
314// %7 = getelementptr i32* %1, i32 1
315// store i32 %6, i32* %7
316// br label %else2
317// . . .
318staticvoidscalarizeMaskedStore(constDataLayout &DL,bool HasBranchDivergence,
319CallInst *CI,DomTreeUpdater *DTU,
320bool &ModifiedDT) {
321Value *Src = CI->getArgOperand(0);
322Value *Ptr = CI->getArgOperand(1);
323Value *Alignment = CI->getArgOperand(2);
324Value *Mask = CI->getArgOperand(3);
325
326constAlign AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
327auto *VecType = cast<VectorType>(Src->getType());
328
329Type *EltTy = VecType->getElementType();
330
331IRBuilder<> Builder(CI->getContext());
332Instruction *InsertPt = CI;
333 Builder.SetInsertPoint(InsertPt);
334 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
335
336// Short-cut if the mask is all-true.
337if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
338StoreInst *Store = Builder.CreateAlignedStore(Src,Ptr, AlignVal);
339 Store->takeName(CI);
340 Store->copyMetadata(*CI);
341 CI->eraseFromParent();
342return;
343 }
344
345// Adjust alignment for the scalar instruction.
346constAlign AdjustedAlignVal =
347commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
348unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
349
350if (isConstantIntVector(Mask)) {
351for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
352if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
353continue;
354Value *OneElt = Builder.CreateExtractElement(Src,Idx);
355Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr,Idx);
356 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
357 }
358 CI->eraseFromParent();
359return;
360 }
361
362// Optimize the case where the "masked store" is a predicated store - that is,
363// when the mask is the splat of a non-constant scalar boolean. In that case,
364// optimize to a conditional store.
365if (isSplatValue(Mask,/*Index=*/0)) {
366Value *Predicate = Builder.CreateExtractElement(Mask,uint64_t(0ull),
367 Mask->getName() +".first");
368Instruction *ThenTerm =
369SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
370/*BranchWeights=*/nullptr, DTU);
371BasicBlock *CondBlock = ThenTerm->getParent();
372 CondBlock->setName("cond.store");
373 Builder.SetInsertPoint(CondBlock->getTerminator());
374
375StoreInst *Store = Builder.CreateAlignedStore(Src,Ptr, AlignVal);
376 Store->takeName(CI);
377 Store->copyMetadata(*CI);
378
379 CI->eraseFromParent();
380 ModifiedDT =true;
381return;
382 }
383
384// If the mask is not v1i1, use scalar bit test operations. This generates
385// better results on X86 at least. However, don't do this on GPUs or other
386// machines with branch divergence, as there each i1 takes up a register.
387Value *SclrMask =nullptr;
388if (VectorWidth != 1 && !HasBranchDivergence) {
389Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
390 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy,"scalar_mask");
391 }
392
393for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
394// Fill the "else" block, created in the previous iteration
395//
396// %mask_1 = and i16 %scalar_mask, i32 1 << Idx
397// %cond = icmp ne i16 %mask_1, 0
398// br i1 %mask_1, label %cond.store, label %else
399//
400// On GPUs, use
401// %cond = extrectelement %mask, Idx
402// instead
403Value *Predicate;
404if (SclrMask !=nullptr) {
405Value *Mask = Builder.getInt(APInt::getOneBitSet(
406 VectorWidth,adjustForEndian(DL, VectorWidth,Idx)));
407Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
408 Builder.getIntN(VectorWidth, 0));
409 }else {
410Predicate = Builder.CreateExtractElement(Mask,Idx);
411 }
412
413// Create "cond" block
414//
415// %OneElt = extractelement <16 x i32> %Src, i32 Idx
416// %EltAddr = getelementptr i32* %1, i32 0
417// %store i32 %OneElt, i32* %EltAddr
418//
419Instruction *ThenTerm =
420SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
421/*BranchWeights=*/nullptr, DTU);
422
423BasicBlock *CondBlock = ThenTerm->getParent();
424 CondBlock->setName("cond.store");
425
426 Builder.SetInsertPoint(CondBlock->getTerminator());
427Value *OneElt = Builder.CreateExtractElement(Src,Idx);
428Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr,Idx);
429 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
430
431// Create "else" block, fill it in the next iteration
432BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
433 NewIfBlock->setName("else");
434
435 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
436 }
437 CI->eraseFromParent();
438
439 ModifiedDT =true;
440}
441
442// Translate a masked gather intrinsic like
443// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
444// <16 x i1> %Mask, <16 x i32> %Src)
445// to a chain of basic blocks, with loading element one-by-one if
446// the appropriate mask bit is set
447//
448// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
449// %Mask0 = extractelement <16 x i1> %Mask, i32 0
450// br i1 %Mask0, label %cond.load, label %else
451//
452// cond.load:
453// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
454// %Load0 = load i32, i32* %Ptr0, align 4
455// %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
456// br label %else
457//
458// else:
459// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
460// %Mask1 = extractelement <16 x i1> %Mask, i32 1
461// br i1 %Mask1, label %cond.load1, label %else2
462//
463// cond.load1:
464// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
465// %Load1 = load i32, i32* %Ptr1, align 4
466// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
467// br label %else2
468// . . .
469// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
470// ret <16 x i32> %Result
471staticvoidscalarizeMaskedGather(constDataLayout &DL,
472bool HasBranchDivergence,CallInst *CI,
473DomTreeUpdater *DTU,bool &ModifiedDT) {
474Value *Ptrs = CI->getArgOperand(0);
475Value *Alignment = CI->getArgOperand(1);
476Value *Mask = CI->getArgOperand(2);
477Value *Src0 = CI->getArgOperand(3);
478
479auto *VecType = cast<FixedVectorType>(CI->getType());
480Type *EltTy = VecType->getElementType();
481
482IRBuilder<> Builder(CI->getContext());
483Instruction *InsertPt = CI;
484BasicBlock *IfBlock = CI->getParent();
485 Builder.SetInsertPoint(InsertPt);
486MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
487
488 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
489
490// The result vector
491Value *VResult = Src0;
492unsigned VectorWidth = VecType->getNumElements();
493
494// Shorten the way if the mask is a vector of constants.
495if (isConstantIntVector(Mask)) {
496for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
497if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
498continue;
499Value *Ptr = Builder.CreateExtractElement(Ptrs,Idx,"Ptr" +Twine(Idx));
500LoadInst *Load =
501 Builder.CreateAlignedLoad(EltTy,Ptr, AlignVal,"Load" +Twine(Idx));
502 VResult =
503 Builder.CreateInsertElement(VResult, Load,Idx,"Res" +Twine(Idx));
504 }
505 CI->replaceAllUsesWith(VResult);
506 CI->eraseFromParent();
507return;
508 }
509
510// If the mask is not v1i1, use scalar bit test operations. This generates
511// better results on X86 at least. However, don't do this on GPUs or other
512// machines with branch divergence, as there, each i1 takes up a register.
513Value *SclrMask =nullptr;
514if (VectorWidth != 1 && !HasBranchDivergence) {
515Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
516 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy,"scalar_mask");
517 }
518
519for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
520// Fill the "else" block, created in the previous iteration
521//
522// %Mask1 = and i16 %scalar_mask, i32 1 << Idx
523// %cond = icmp ne i16 %mask_1, 0
524// br i1 %Mask1, label %cond.load, label %else
525//
526// On GPUs, use
527// %cond = extrectelement %mask, Idx
528// instead
529
530Value *Predicate;
531if (SclrMask !=nullptr) {
532Value *Mask = Builder.getInt(APInt::getOneBitSet(
533 VectorWidth,adjustForEndian(DL, VectorWidth,Idx)));
534Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
535 Builder.getIntN(VectorWidth, 0));
536 }else {
537Predicate = Builder.CreateExtractElement(Mask,Idx,"Mask" +Twine(Idx));
538 }
539
540// Create "cond" block
541//
542// %EltAddr = getelementptr i32* %1, i32 0
543// %Elt = load i32* %EltAddr
544// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
545//
546Instruction *ThenTerm =
547SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
548/*BranchWeights=*/nullptr, DTU);
549
550BasicBlock *CondBlock = ThenTerm->getParent();
551 CondBlock->setName("cond.load");
552
553 Builder.SetInsertPoint(CondBlock->getTerminator());
554Value *Ptr = Builder.CreateExtractElement(Ptrs,Idx,"Ptr" +Twine(Idx));
555LoadInst *Load =
556 Builder.CreateAlignedLoad(EltTy,Ptr, AlignVal,"Load" +Twine(Idx));
557Value *NewVResult =
558 Builder.CreateInsertElement(VResult, Load,Idx,"Res" +Twine(Idx));
559
560// Create "else" block, fill it in the next iteration
561BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
562 NewIfBlock->setName("else");
563BasicBlock *PrevIfBlock = IfBlock;
564 IfBlock = NewIfBlock;
565
566// Create the phi to join the new and previous value.
567 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
568PHINode *Phi = Builder.CreatePHI(VecType, 2,"res.phi.else");
569 Phi->addIncoming(NewVResult, CondBlock);
570 Phi->addIncoming(VResult, PrevIfBlock);
571 VResult = Phi;
572 }
573
574 CI->replaceAllUsesWith(VResult);
575 CI->eraseFromParent();
576
577 ModifiedDT =true;
578}
579
580// Translate a masked scatter intrinsic, like
581// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
582// <16 x i1> %Mask)
583// to a chain of basic blocks, that stores element one-by-one if
584// the appropriate mask bit is set.
585//
586// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
587// %Mask0 = extractelement <16 x i1> %Mask, i32 0
588// br i1 %Mask0, label %cond.store, label %else
589//
590// cond.store:
591// %Elt0 = extractelement <16 x i32> %Src, i32 0
592// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
593// store i32 %Elt0, i32* %Ptr0, align 4
594// br label %else
595//
596// else:
597// %Mask1 = extractelement <16 x i1> %Mask, i32 1
598// br i1 %Mask1, label %cond.store1, label %else2
599//
600// cond.store1:
601// %Elt1 = extractelement <16 x i32> %Src, i32 1
602// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
603// store i32 %Elt1, i32* %Ptr1, align 4
604// br label %else2
605// . . .
606staticvoidscalarizeMaskedScatter(constDataLayout &DL,
607bool HasBranchDivergence,CallInst *CI,
608DomTreeUpdater *DTU,bool &ModifiedDT) {
609Value *Src = CI->getArgOperand(0);
610Value *Ptrs = CI->getArgOperand(1);
611Value *Alignment = CI->getArgOperand(2);
612Value *Mask = CI->getArgOperand(3);
613
614auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
615
616assert(
617 isa<VectorType>(Ptrs->getType()) &&
618 isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
619"Vector of pointers is expected in masked scatter intrinsic");
620
621IRBuilder<> Builder(CI->getContext());
622Instruction *InsertPt = CI;
623 Builder.SetInsertPoint(InsertPt);
624 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
625
626MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
627unsigned VectorWidth = SrcFVTy->getNumElements();
628
629// Shorten the way if the mask is a vector of constants.
630if (isConstantIntVector(Mask)) {
631for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
632if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
633continue;
634Value *OneElt =
635 Builder.CreateExtractElement(Src,Idx,"Elt" +Twine(Idx));
636Value *Ptr = Builder.CreateExtractElement(Ptrs,Idx,"Ptr" +Twine(Idx));
637 Builder.CreateAlignedStore(OneElt,Ptr, AlignVal);
638 }
639 CI->eraseFromParent();
640return;
641 }
642
643// If the mask is not v1i1, use scalar bit test operations. This generates
644// better results on X86 at least.
645Value *SclrMask =nullptr;
646if (VectorWidth != 1 && !HasBranchDivergence) {
647Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
648 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy,"scalar_mask");
649 }
650
651for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
652// Fill the "else" block, created in the previous iteration
653//
654// %Mask1 = and i16 %scalar_mask, i32 1 << Idx
655// %cond = icmp ne i16 %mask_1, 0
656// br i1 %Mask1, label %cond.store, label %else
657//
658// On GPUs, use
659// %cond = extrectelement %mask, Idx
660// instead
661Value *Predicate;
662if (SclrMask !=nullptr) {
663Value *Mask = Builder.getInt(APInt::getOneBitSet(
664 VectorWidth,adjustForEndian(DL, VectorWidth,Idx)));
665Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
666 Builder.getIntN(VectorWidth, 0));
667 }else {
668Predicate = Builder.CreateExtractElement(Mask,Idx,"Mask" +Twine(Idx));
669 }
670
671// Create "cond" block
672//
673// %Elt1 = extractelement <16 x i32> %Src, i32 1
674// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
675// %store i32 %Elt1, i32* %Ptr1
676//
677Instruction *ThenTerm =
678SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
679/*BranchWeights=*/nullptr, DTU);
680
681BasicBlock *CondBlock = ThenTerm->getParent();
682 CondBlock->setName("cond.store");
683
684 Builder.SetInsertPoint(CondBlock->getTerminator());
685Value *OneElt = Builder.CreateExtractElement(Src,Idx,"Elt" +Twine(Idx));
686Value *Ptr = Builder.CreateExtractElement(Ptrs,Idx,"Ptr" +Twine(Idx));
687 Builder.CreateAlignedStore(OneElt,Ptr, AlignVal);
688
689// Create "else" block, fill it in the next iteration
690BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
691 NewIfBlock->setName("else");
692
693 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
694 }
695 CI->eraseFromParent();
696
697 ModifiedDT =true;
698}
699
700staticvoidscalarizeMaskedExpandLoad(constDataLayout &DL,
701bool HasBranchDivergence,CallInst *CI,
702DomTreeUpdater *DTU,bool &ModifiedDT) {
703Value *Ptr = CI->getArgOperand(0);
704Value *Mask = CI->getArgOperand(1);
705Value *PassThru = CI->getArgOperand(2);
706Align Alignment = CI->getParamAlign(0).valueOrOne();
707
708auto *VecType = cast<FixedVectorType>(CI->getType());
709
710Type *EltTy = VecType->getElementType();
711
712IRBuilder<> Builder(CI->getContext());
713Instruction *InsertPt = CI;
714BasicBlock *IfBlock = CI->getParent();
715
716 Builder.SetInsertPoint(InsertPt);
717 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
718
719unsigned VectorWidth = VecType->getNumElements();
720
721// The result vector
722Value *VResult = PassThru;
723
724// Adjust alignment for the scalar instruction.
725constAlign AdjustedAlignment =
726commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
727
728// Shorten the way if the mask is a vector of constants.
729// Create a build_vector pattern, with loads/poisons as necessary and then
730// shuffle blend with the pass through value.
731if (isConstantIntVector(Mask)) {
732unsigned MemIndex = 0;
733 VResult =PoisonValue::get(VecType);
734SmallVector<int, 16> ShuffleMask(VectorWidth,PoisonMaskElem);
735for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
736Value *InsertElt;
737if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
738 InsertElt =PoisonValue::get(EltTy);
739 ShuffleMask[Idx] =Idx + VectorWidth;
740 }else {
741Value *NewPtr =
742 Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr, MemIndex);
743 InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment,
744"Load" +Twine(Idx));
745 ShuffleMask[Idx] =Idx;
746 ++MemIndex;
747 }
748 VResult = Builder.CreateInsertElement(VResult, InsertElt,Idx,
749"Res" +Twine(Idx));
750 }
751 VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
752 CI->replaceAllUsesWith(VResult);
753 CI->eraseFromParent();
754return;
755 }
756
757// If the mask is not v1i1, use scalar bit test operations. This generates
758// better results on X86 at least. However, don't do this on GPUs or other
759// machines with branch divergence, as there, each i1 takes up a register.
760Value *SclrMask =nullptr;
761if (VectorWidth != 1 && !HasBranchDivergence) {
762Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
763 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy,"scalar_mask");
764 }
765
766for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
767// Fill the "else" block, created in the previous iteration
768//
769// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else,
770// %else ] %mask_1 = extractelement <16 x i1> %mask, i32 Idx br i1 %mask_1,
771// label %cond.load, label %else
772//
773// On GPUs, use
774// %cond = extrectelement %mask, Idx
775// instead
776
777Value *Predicate;
778if (SclrMask !=nullptr) {
779Value *Mask = Builder.getInt(APInt::getOneBitSet(
780 VectorWidth,adjustForEndian(DL, VectorWidth,Idx)));
781Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
782 Builder.getIntN(VectorWidth, 0));
783 }else {
784Predicate = Builder.CreateExtractElement(Mask,Idx,"Mask" +Twine(Idx));
785 }
786
787// Create "cond" block
788//
789// %EltAddr = getelementptr i32* %1, i32 0
790// %Elt = load i32* %EltAddr
791// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
792//
793Instruction *ThenTerm =
794SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
795/*BranchWeights=*/nullptr, DTU);
796
797BasicBlock *CondBlock = ThenTerm->getParent();
798 CondBlock->setName("cond.load");
799
800 Builder.SetInsertPoint(CondBlock->getTerminator());
801LoadInst *Load = Builder.CreateAlignedLoad(EltTy,Ptr, AdjustedAlignment);
802Value *NewVResult = Builder.CreateInsertElement(VResult, Load,Idx);
803
804// Move the pointer if there are more blocks to come.
805Value *NewPtr;
806if ((Idx + 1) != VectorWidth)
807 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr, 1);
808
809// Create "else" block, fill it in the next iteration
810BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
811 NewIfBlock->setName("else");
812BasicBlock *PrevIfBlock = IfBlock;
813 IfBlock = NewIfBlock;
814
815// Create the phi to join the new and previous value.
816 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
817PHINode *ResultPhi = Builder.CreatePHI(VecType, 2,"res.phi.else");
818 ResultPhi->addIncoming(NewVResult, CondBlock);
819 ResultPhi->addIncoming(VResult, PrevIfBlock);
820 VResult = ResultPhi;
821
822// Add a PHI for the pointer if this isn't the last iteration.
823if ((Idx + 1) != VectorWidth) {
824PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2,"ptr.phi.else");
825 PtrPhi->addIncoming(NewPtr, CondBlock);
826 PtrPhi->addIncoming(Ptr, PrevIfBlock);
827Ptr = PtrPhi;
828 }
829 }
830
831 CI->replaceAllUsesWith(VResult);
832 CI->eraseFromParent();
833
834 ModifiedDT =true;
835}
836
837staticvoidscalarizeMaskedCompressStore(constDataLayout &DL,
838bool HasBranchDivergence,CallInst *CI,
839DomTreeUpdater *DTU,
840bool &ModifiedDT) {
841Value *Src = CI->getArgOperand(0);
842Value *Ptr = CI->getArgOperand(1);
843Value *Mask = CI->getArgOperand(2);
844Align Alignment = CI->getParamAlign(1).valueOrOne();
845
846auto *VecType = cast<FixedVectorType>(Src->getType());
847
848IRBuilder<> Builder(CI->getContext());
849Instruction *InsertPt = CI;
850BasicBlock *IfBlock = CI->getParent();
851
852 Builder.SetInsertPoint(InsertPt);
853 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
854
855Type *EltTy = VecType->getElementType();
856
857// Adjust alignment for the scalar instruction.
858constAlign AdjustedAlignment =
859commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
860
861unsigned VectorWidth = VecType->getNumElements();
862
863// Shorten the way if the mask is a vector of constants.
864if (isConstantIntVector(Mask)) {
865unsigned MemIndex = 0;
866for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
867if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
868continue;
869Value *OneElt =
870 Builder.CreateExtractElement(Src,Idx,"Elt" +Twine(Idx));
871Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr, MemIndex);
872 Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment);
873 ++MemIndex;
874 }
875 CI->eraseFromParent();
876return;
877 }
878
879// If the mask is not v1i1, use scalar bit test operations. This generates
880// better results on X86 at least. However, don't do this on GPUs or other
881// machines with branch divergence, as there, each i1 takes up a register.
882Value *SclrMask =nullptr;
883if (VectorWidth != 1 && !HasBranchDivergence) {
884Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
885 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy,"scalar_mask");
886 }
887
888for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
889// Fill the "else" block, created in the previous iteration
890//
891// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
892// br i1 %mask_1, label %cond.store, label %else
893//
894// On GPUs, use
895// %cond = extrectelement %mask, Idx
896// instead
897Value *Predicate;
898if (SclrMask !=nullptr) {
899Value *Mask = Builder.getInt(APInt::getOneBitSet(
900 VectorWidth,adjustForEndian(DL, VectorWidth,Idx)));
901Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
902 Builder.getIntN(VectorWidth, 0));
903 }else {
904Predicate = Builder.CreateExtractElement(Mask,Idx,"Mask" +Twine(Idx));
905 }
906
907// Create "cond" block
908//
909// %OneElt = extractelement <16 x i32> %Src, i32 Idx
910// %EltAddr = getelementptr i32* %1, i32 0
911// %store i32 %OneElt, i32* %EltAddr
912//
913Instruction *ThenTerm =
914SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
915/*BranchWeights=*/nullptr, DTU);
916
917BasicBlock *CondBlock = ThenTerm->getParent();
918 CondBlock->setName("cond.store");
919
920 Builder.SetInsertPoint(CondBlock->getTerminator());
921Value *OneElt = Builder.CreateExtractElement(Src,Idx);
922 Builder.CreateAlignedStore(OneElt,Ptr, AdjustedAlignment);
923
924// Move the pointer if there are more blocks to come.
925Value *NewPtr;
926if ((Idx + 1) != VectorWidth)
927 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy,Ptr, 1);
928
929// Create "else" block, fill it in the next iteration
930BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
931 NewIfBlock->setName("else");
932BasicBlock *PrevIfBlock = IfBlock;
933 IfBlock = NewIfBlock;
934
935 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
936
937// Add a PHI for the pointer if this isn't the last iteration.
938if ((Idx + 1) != VectorWidth) {
939PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2,"ptr.phi.else");
940 PtrPhi->addIncoming(NewPtr, CondBlock);
941 PtrPhi->addIncoming(Ptr, PrevIfBlock);
942Ptr = PtrPhi;
943 }
944 }
945 CI->eraseFromParent();
946
947 ModifiedDT =true;
948}
949
950staticvoidscalarizeMaskedVectorHistogram(constDataLayout &DL,CallInst *CI,
951DomTreeUpdater *DTU,
952bool &ModifiedDT) {
953// If we extend histogram to return a result someday (like the updated vector)
954// then we'll need to support it here.
955assert(CI->getType()->isVoidTy() &&"Histogram with non-void return.");
956Value *Ptrs = CI->getArgOperand(0);
957Value *Inc = CI->getArgOperand(1);
958Value *Mask = CI->getArgOperand(2);
959
960auto *AddrType = cast<FixedVectorType>(Ptrs->getType());
961Type *EltTy = Inc->getType();
962
963IRBuilder<> Builder(CI->getContext());
964Instruction *InsertPt = CI;
965 Builder.SetInsertPoint(InsertPt);
966
967 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
968
969// FIXME: Do we need to add an alignment parameter to the intrinsic?
970unsigned VectorWidth = AddrType->getNumElements();
971
972// Shorten the way if the mask is a vector of constants.
973if (isConstantIntVector(Mask)) {
974for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
975if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
976continue;
977Value *Ptr = Builder.CreateExtractElement(Ptrs,Idx,"Ptr" +Twine(Idx));
978LoadInst *Load = Builder.CreateLoad(EltTy,Ptr,"Load" +Twine(Idx));
979Value *Add = Builder.CreateAdd(Load, Inc);
980 Builder.CreateStore(Add,Ptr);
981 }
982 CI->eraseFromParent();
983return;
984 }
985
986for (unsignedIdx = 0;Idx < VectorWidth; ++Idx) {
987Value *Predicate =
988 Builder.CreateExtractElement(Mask,Idx,"Mask" +Twine(Idx));
989
990Instruction *ThenTerm =
991SplitBlockAndInsertIfThen(Predicate, InsertPt,/*Unreachable=*/false,
992/*BranchWeights=*/nullptr, DTU);
993
994BasicBlock *CondBlock = ThenTerm->getParent();
995 CondBlock->setName("cond.histogram.update");
996
997 Builder.SetInsertPoint(CondBlock->getTerminator());
998Value *Ptr = Builder.CreateExtractElement(Ptrs,Idx,"Ptr" +Twine(Idx));
999LoadInst *Load = Builder.CreateLoad(EltTy,Ptr,"Load" +Twine(Idx));
1000Value *Add = Builder.CreateAdd(Load, Inc);
1001 Builder.CreateStore(Add,Ptr);
1002
1003// Create "else" block, fill it in the next iteration
1004BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
1005 NewIfBlock->setName("else");
1006 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
1007 }
1008
1009 CI->eraseFromParent();
1010 ModifiedDT =true;
1011}
1012
1013staticboolrunImpl(Function &F,constTargetTransformInfo &TTI,
1014DominatorTree *DT) {
1015 std::optional<DomTreeUpdater> DTU;
1016if (DT)
1017 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
1018
1019bool EverMadeChange =false;
1020bool MadeChange =true;
1021auto &DL =F.getDataLayout();
1022bool HasBranchDivergence =TTI.hasBranchDivergence(&F);
1023while (MadeChange) {
1024 MadeChange =false;
1025for (BasicBlock &BB :llvm::make_early_inc_range(F)) {
1026bool ModifiedDTOnIteration =false;
1027 MadeChange |=optimizeBlock(BB, ModifiedDTOnIteration,TTI,DL,
1028 HasBranchDivergence, DTU ? &*DTU :nullptr);
1029
1030// Restart BB iteration if the dominator tree of the Function was changed
1031if (ModifiedDTOnIteration)
1032break;
1033 }
1034
1035 EverMadeChange |= MadeChange;
1036 }
1037return EverMadeChange;
1038}
1039
1040bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
1041auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1042DominatorTree *DT =nullptr;
1043if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
1044 DT = &DTWP->getDomTree();
1045returnrunImpl(F,TTI, DT);
1046}
1047
1048PreservedAnalyses
1049ScalarizeMaskedMemIntrinPass::run(Function &F,FunctionAnalysisManager &AM) {
1050auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1051auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
1052if (!runImpl(F,TTI, DT))
1053returnPreservedAnalyses::all();
1054PreservedAnalyses PA;
1055 PA.preserve<TargetIRAnalysis>();
1056 PA.preserve<DominatorTreeAnalysis>();
1057return PA;
1058}
1059
1060staticbooloptimizeBlock(BasicBlock &BB,bool &ModifiedDT,
1061constTargetTransformInfo &TTI,constDataLayout &DL,
1062bool HasBranchDivergence,DomTreeUpdater *DTU) {
1063bool MadeChange =false;
1064
1065BasicBlock::iterator CurInstIterator = BB.begin();
1066while (CurInstIterator != BB.end()) {
1067if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
1068 MadeChange |=
1069optimizeCallInst(CI, ModifiedDT,TTI,DL, HasBranchDivergence, DTU);
1070if (ModifiedDT)
1071returntrue;
1072 }
1073
1074return MadeChange;
1075}
1076
1077staticbooloptimizeCallInst(CallInst *CI,bool &ModifiedDT,
1078constTargetTransformInfo &TTI,
1079constDataLayout &DL,bool HasBranchDivergence,
1080DomTreeUpdater *DTU) {
1081IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1082if (II) {
1083// The scalarization code below does not work for scalable vectors.
1084if (isa<ScalableVectorType>(II->getType()) ||
1085any_of(II->args(),
1086 [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
1087returnfalse;
1088switch (II->getIntrinsicID()) {
1089default:
1090break;
1091case Intrinsic::experimental_vector_histogram_add:
1092if (TTI.isLegalMaskedVectorHistogram(CI->getArgOperand(0)->getType(),
1093 CI->getArgOperand(1)->getType()))
1094returnfalse;
1095scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
1096returntrue;
1097case Intrinsic::masked_load:
1098// Scalarize unsupported vector masked load
1099if (TTI.isLegalMaskedLoad(
1100 CI->getType(),
1101 cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
1102returnfalse;
1103scalarizeMaskedLoad(DL, HasBranchDivergence, CI, DTU, ModifiedDT);
1104returntrue;
1105case Intrinsic::masked_store:
1106if (TTI.isLegalMaskedStore(
1107 CI->getArgOperand(0)->getType(),
1108 cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
1109returnfalse;
1110scalarizeMaskedStore(DL, HasBranchDivergence, CI, DTU, ModifiedDT);
1111returntrue;
1112case Intrinsic::masked_gather: {
1113MaybeAlign MA =
1114 cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
1115Type *LoadTy = CI->getType();
1116Align Alignment =DL.getValueOrABITypeAlignment(MA,
1117 LoadTy->getScalarType());
1118if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
1119 !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
1120returnfalse;
1121scalarizeMaskedGather(DL, HasBranchDivergence, CI, DTU, ModifiedDT);
1122returntrue;
1123 }
1124case Intrinsic::masked_scatter: {
1125MaybeAlign MA =
1126 cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
1127Type *StoreTy = CI->getArgOperand(0)->getType();
1128Align Alignment =DL.getValueOrABITypeAlignment(MA,
1129 StoreTy->getScalarType());
1130if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
1131 !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
1132 Alignment))
1133returnfalse;
1134scalarizeMaskedScatter(DL, HasBranchDivergence, CI, DTU, ModifiedDT);
1135returntrue;
1136 }
1137case Intrinsic::masked_expandload:
1138if (TTI.isLegalMaskedExpandLoad(
1139 CI->getType(),
1140 CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne()))
1141returnfalse;
1142scalarizeMaskedExpandLoad(DL, HasBranchDivergence, CI, DTU, ModifiedDT);
1143returntrue;
1144case Intrinsic::masked_compressstore:
1145if (TTI.isLegalMaskedCompressStore(
1146 CI->getArgOperand(0)->getType(),
1147 CI->getAttributes().getParamAttrs(1).getAlignment().valueOrOne()))
1148returnfalse;
1149scalarizeMaskedCompressStore(DL, HasBranchDivergence, CI, DTU,
1150 ModifiedDT);
1151returntrue;
1152 }
1153 }
1154
1155returnfalse;
1156}
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
BasicBlockUtils.h
unsupported
static Error unsupported(const char *Str, const Triple &T)
Definition:MachO.cpp:71
Casting.h
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
DerivedTypes.h
DomTreeUpdater.h
Dominators.h
runImpl
static bool runImpl(Function &F, const TargetLowering &TLI)
Definition:ExpandLargeDivRem.cpp:79
IRBuilder.h
BasicBlock.h
Constant.h
Function.h
Instruction.h
IntrinsicInst.h
Type.h
Value.h
InitializePasses.h
Instructions.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
II
uint64_t IntrinsicInst * II
Definition:NVVMIntrRange.cpp:51
INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition:PassSupport.h:55
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:57
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:52
Pass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Scalar.h
scalarizeMaskedExpandLoad
static void scalarizeMaskedExpandLoad(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
Definition:ScalarizeMaskedMemIntrin.cpp:700
scalarizeMaskedVectorHistogram
static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
Definition:ScalarizeMaskedMemIntrin.cpp:950
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
Definition:ScalarizeMaskedMemIntrin.cpp:1060
scalarizeMaskedScatter
static void scalarizeMaskedScatter(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
Definition:ScalarizeMaskedMemIntrin.cpp:606
adjustForEndian
static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, unsigned Idx)
Definition:ScalarizeMaskedMemIntrin.cpp:108
optimizeCallInst
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
Definition:ScalarizeMaskedMemIntrin.cpp:1077
scalarizeMaskedStore
static void scalarizeMaskedStore(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
Definition:ScalarizeMaskedMemIntrin.cpp:318
scalarizeMaskedCompressStore
static void scalarizeMaskedCompressStore(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
Definition:ScalarizeMaskedMemIntrin.cpp:837
scalarizeMaskedGather
static void scalarizeMaskedGather(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
Definition:ScalarizeMaskedMemIntrin.cpp:471
runImpl
static bool runImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT)
Definition:ScalarizeMaskedMemIntrin.cpp:1013
isConstantIntVector
static bool isConstantIntVector(Value *Mask)
Definition:ScalarizeMaskedMemIntrin.cpp:93
DEBUG_TYPE
#define DEBUG_TYPE
Definition:ScalarizeMaskedMemIntrin.cpp:43
intrinsics
Scalarize unsupported masked memory intrinsics
Definition:ScalarizeMaskedMemIntrin.cpp:86
scalarizeMaskedLoad
static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
Definition:ScalarizeMaskedMemIntrin.cpp:145
ScalarizeMaskedMemIntrin.h
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.
Twine.h
VectorUtils.h
Predicate
Definition:AMDGPURegBankLegalizeRules.cpp:332
VectorType
Definition:ItaniumDemangle.h:1173
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition:APInt.h:239
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition:PassManager.h:253
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition:PassManager.h:429
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition:PassManager.h:410
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition:PassAnalysisSupport.h:47
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition:PassAnalysisSupport.h:75
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition:PassAnalysisSupport.h:98
llvm::AttributeList::getParamAttrs
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
Definition:Attributes.cpp:1852
llvm::AttributeSet::getAlignment
MaybeAlign getAlignment() const
Definition:Attributes.cpp:1123
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::end
iterator end()
Definition:BasicBlock.h:474
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition:BasicBlock.h:461
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition:BasicBlock.h:177
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition:BasicBlock.h:240
llvm::CallBase::getParamAlign
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition:InstrTypes.h:1748
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition:InstrTypes.h:1286
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the attributes for this call.
Definition:InstrTypes.h:1417
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::Constant
This is an important base class in LLVM.
Definition:Constant.h:42
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::DomTreeUpdater
Definition:DomTreeUpdater.h:30
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition:Dominators.h:279
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition:Dominators.h:317
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition:Dominators.h:162
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition:Pass.h:310
llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
llvm::Function
Definition:Function.h:63
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition:IRBuilder.h:2511
llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition:IRBuilder.h:2499
llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition:IRBuilder.h:558
llvm::IRBuilderBase::CreateAlignedLoad
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition:IRBuilder.h:1815
llvm::IRBuilderBase::CreateConstInBoundsGEP1_32
Value * CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition:IRBuilder.h:1897
llvm::IRBuilderBase::SetCurrentDebugLocation
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition:IRBuilder.h:239
llvm::IRBuilderBase::CreateICmpNE
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:2274
llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition:IRBuilder.h:2435
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition:IRBuilder.h:2152
llvm::IRBuilderBase::getIntN
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition:IRBuilder.h:516
llvm::IRBuilderBase::CreateLoad
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition:IRBuilder.h:1798
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition:IRBuilder.h:2533
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:1518
llvm::IRBuilderBase::CreateStore
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition:IRBuilder.h:1811
llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition:IRBuilder.h:1370
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition:IRBuilder.h:199
llvm::IRBuilderBase::CreateAlignedStore
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition:IRBuilder.h:1834
llvm::IRBuilderBase::getInt
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition:IRBuilder.h:521
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition:IRBuilder.h:2705
llvm::Instruction
Definition:Instruction.h:68
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition:Instruction.h:511
llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition:Instruction.cpp:94
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Definition:Instruction.cpp:1287
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition:Instruction.cpp:1345
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition:IntrinsicInst.h:48
llvm::LoadInst
An instruction for reading from memory.
Definition:Instructions.h:176
llvm::PHINode
Definition:Instructions.h:2600
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition:Instructions.h:2735
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition:PassRegistry.cpp:24
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition:Pass.cpp:98
llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition:Pass.cpp:81
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition:Constants.cpp:1878
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition:Analysis.h:111
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition:Analysis.h:117
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition:Analysis.h:131
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StoreInst
An instruction for storing to memory.
Definition:Instructions.h:292
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition:TargetTransformInfo.h:3194
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition:TargetTransformInfo.h:3250
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition:TargetTransformInfo.h:212
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition:TargetTransformInfo.cpp:501
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
Definition:TargetTransformInfo.cpp:521
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
Definition:TargetTransformInfo.cpp:289
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition:TargetTransformInfo.cpp:490
llvm::TargetTransformInfo::forceScalarizeMaskedGather
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
Definition:TargetTransformInfo.cpp:506
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
Definition:TargetTransformInfo.cpp:516
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition:TargetTransformInfo.cpp:466
llvm::TargetTransformInfo::isLegalMaskedVectorHistogram
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
Definition:TargetTransformInfo.cpp:538
llvm::TargetTransformInfo::forceScalarizeMaskedScatter
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
Definition:TargetTransformInfo.cpp:511
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition:TargetTransformInfo.cpp:471
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition:Twine.h:81
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition:Type.h:139
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition:Type.h:355
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition:Value.cpp:377
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition:Value.cpp:534
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition:Value.cpp:1075
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition:Value.cpp:309
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition:Value.cpp:383
llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition:ilist_node.h:32
uint64_t
unsigned
false
Definition:StackSlotColoring.cpp:193
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition:STLExtras.h:657
llvm::createScalarizeMaskedMemIntrinLegacyPass
FunctionPass * createScalarizeMaskedMemIntrinLegacyPass()
Definition:ScalarizeMaskedMemIntrin.cpp:89
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition:VectorUtils.cpp:327
llvm::initializeScalarizeMaskedMemIntrinLegacyPassPass
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &)
llvm::PoisonMaskElem
constexpr int PoisonMaskElem
Definition:Instructions.h:1889
llvm::RecurKind::Add
@ Add
Sum of integers.
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition:Alignment.h:212
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition:BasicBlockUtils.cpp:1609
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition:Alignment.h:117
llvm::MaybeAlign::valueOrOne
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition:Alignment.h:141
llvm::ScalarizeMaskedMemIntrinPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition:ScalarizeMaskedMemIntrin.cpp:1049

Generated on Sun Jul 20 2025 14:17:35 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp