Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
AMDGPULowerKernelArguments.cpp
Go to the documentation of this file.
1//===-- AMDGPULowerKernelArguments.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass replaces accesses to kernel arguments with loads from
10/// offsets from the kernarg base pointer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "GCNSubtarget.h"
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/CodeGen/TargetPassConfig.h"
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/IntrinsicsAMDGPU.h"
21#include "llvm/IR/MDBuilder.h"
22#include "llvm/Target/TargetMachine.h"
23
24#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
25
26using namespacellvm;
27
28namespace{
29
30classPreloadKernelArgInfo {
31private:
32Function &F;
33constGCNSubtarget &ST;
34unsigned NumFreeUserSGPRs;
35
36enum HiddenArg :unsigned {
37 HIDDEN_BLOCK_COUNT_X,
38 HIDDEN_BLOCK_COUNT_Y,
39 HIDDEN_BLOCK_COUNT_Z,
40 HIDDEN_GROUP_SIZE_X,
41 HIDDEN_GROUP_SIZE_Y,
42 HIDDEN_GROUP_SIZE_Z,
43 HIDDEN_REMAINDER_X,
44 HIDDEN_REMAINDER_Y,
45 HIDDEN_REMAINDER_Z,
46 END_HIDDEN_ARGS
47 };
48
49// Stores information about a specific hidden argument.
50structHiddenArgInfo {
51// Offset in bytes from the location in the kernearg segment pointed to by
52// the implicitarg pointer.
53uint8_t Offset;
54// The size of the hidden argument in bytes.
55uint8_t Size;
56// The name of the hidden argument in the kernel signature.
57constchar *Name;
58 };
59
60staticconstexpr HiddenArgInfo HiddenArgs[END_HIDDEN_ARGS] = {
61 {0, 4,"_hidden_block_count_x"}, {4, 4,"_hidden_block_count_y"},
62 {8, 4,"_hidden_block_count_z"}, {12, 2,"_hidden_group_size_x"},
63 {14, 2,"_hidden_group_size_y"}, {16, 2,"_hidden_group_size_z"},
64 {18, 2,"_hidden_remainder_x"}, {20, 2,"_hidden_remainder_y"},
65 {22, 2,"_hidden_remainder_z"}};
66
67static HiddenArg getHiddenArgFromOffset(unsignedOffset) {
68for (unsignedI = 0;I < END_HIDDEN_ARGS; ++I)
69if (HiddenArgs[I].Offset ==Offset)
70returnstatic_cast<HiddenArg>(I);
71
72return END_HIDDEN_ARGS;
73 }
74
75staticType *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {
76if (HA < END_HIDDEN_ARGS)
77returnType::getIntNTy(Ctx, HiddenArgs[HA].Size * 8);
78
79llvm_unreachable("Unexpected hidden argument.");
80 }
81
82staticconstchar *getHiddenArgName(HiddenArg HA) {
83if (HA < END_HIDDEN_ARGS) {
84return HiddenArgs[HA].Name;
85 }
86llvm_unreachable("Unexpected hidden argument.");
87 }
88
89// Clones the function after adding implicit arguments to the argument list
90// and returns the new updated function. Preloaded implicit arguments are
91// added up to and including the last one that will be preloaded, indicated by
92// LastPreloadIndex. Currently preloading is only performed on the totality of
93// sequential data from the kernarg segment including implicit (hidden)
94// arguments. This means that all arguments up to the last preloaded argument
95// will also be preloaded even if that data is unused.
96Function *cloneFunctionWithPreloadImplicitArgs(unsigned LastPreloadIndex) {
97FunctionType *FT =F.getFunctionType();
98LLVMContext &Ctx =F.getParent()->getContext();
99SmallVector<Type *, 16> FTypes(FT->param_begin(), FT->param_end());
100for (unsignedI = 0;I <= LastPreloadIndex; ++I)
101 FTypes.push_back(getHiddenArgType(Ctx, HiddenArg(I)));
102
103FunctionType *NFT =
104 FunctionType::get(FT->getReturnType(), FTypes, FT->isVarArg());
105Function *NF =
106Function::Create(NFT,F.getLinkage(),F.getAddressSpace(),F.getName());
107
108 NF->copyAttributesFrom(&F);
109 NF->copyMetadata(&F, 0);
110 NF->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);
111
112F.getParent()->getFunctionList().insert(F.getIterator(), NF);
113 NF->takeName(&F);
114 NF->splice(NF->begin(), &F);
115
116Function::arg_iterator NFArg = NF->arg_begin();
117for (Argument &Arg :F.args()) {
118 Arg.replaceAllUsesWith(&*NFArg);
119 NFArg->takeName(&Arg);
120 ++NFArg;
121 }
122
123AttrBuilder AB(Ctx);
124 AB.addAttribute(Attribute::InReg);
125 AB.addAttribute("amdgpu-hidden-argument");
126AttributeList AL = NF->getAttributes();
127for (unsignedI = 0;I <= LastPreloadIndex; ++I) {
128 AL = AL.addParamAttributes(Ctx, NFArg->getArgNo(), AB);
129 NFArg++->setName(getHiddenArgName(HiddenArg(I)));
130 }
131
132 NF->setAttributes(AL);
133F.replaceAllUsesWith(NF);
134F.setCallingConv(CallingConv::C);
135
136return NF;
137 }
138
139public:
140 PreloadKernelArgInfo(Function &F,constGCNSubtarget &ST) :F(F), ST(ST) {
141 setInitialFreeUserSGPRsCount();
142 }
143
144// Returns the maximum number of user SGPRs that we have available to preload
145// arguments.
146void setInitialFreeUserSGPRsCount() {
147GCNUserSGPRUsageInfo UserSGPRInfo(F, ST);
148 NumFreeUserSGPRs = UserSGPRInfo.getNumFreeUserSGPRs();
149 }
150
151bool tryAllocPreloadSGPRs(unsigned AllocSize,uint64_t ArgOffset,
152uint64_t LastExplicitArgOffset) {
153// Check if this argument may be loaded into the same register as the
154// previous argument.
155if (ArgOffset - LastExplicitArgOffset < 4 &&
156 !isAligned(Align(4), ArgOffset))
157returntrue;
158
159// Pad SGPRs for kernarg alignment.
160 ArgOffset =alignDown(ArgOffset, 4);
161unsigned Padding = ArgOffset - LastExplicitArgOffset;
162unsigned PaddingSGPRs =alignTo(Padding, 4) / 4;
163unsigned NumPreloadSGPRs =alignTo(AllocSize, 4) / 4;
164if (NumPreloadSGPRs + PaddingSGPRs > NumFreeUserSGPRs)
165returnfalse;
166
167 NumFreeUserSGPRs -= (NumPreloadSGPRs + PaddingSGPRs);
168returntrue;
169 }
170
171// Try to allocate SGPRs to preload implicit kernel arguments.
172void tryAllocImplicitArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,
173uint64_t LastExplicitArgOffset,
174IRBuilder<> &Builder) {
175Function *ImplicitArgPtr =Intrinsic::getDeclarationIfExists(
176F.getParent(), Intrinsic::amdgcn_implicitarg_ptr);
177if (!ImplicitArgPtr)
178return;
179
180constDataLayout &DL =F.getParent()->getDataLayout();
181// Pair is the load and the load offset.
182SmallVector<std::pair<LoadInst *, unsigned>, 4> ImplicitArgLoads;
183for (auto *U : ImplicitArgPtr->users()) {
184Instruction *CI = dyn_cast<Instruction>(U);
185if (!CI || CI->getParent()->getParent() != &F)
186continue;
187
188for (auto *U : CI->users()) {
189 int64_tOffset = 0;
190auto *Load = dyn_cast<LoadInst>(U);// Load from ImplicitArgPtr?
191if (!Load) {
192if (GetPointerBaseWithConstantOffset(U,Offset,DL) != CI)
193continue;
194
195 Load = dyn_cast<LoadInst>(*U->user_begin());// Load from GEP?
196 }
197
198if (!Load || !Load->isSimple())
199continue;
200
201// FIXME: Expand to handle 64-bit implicit args and large merged loads.
202LLVMContext &Ctx =F.getParent()->getContext();
203Type *LoadTy = Load->getType();
204 HiddenArg HA = getHiddenArgFromOffset(Offset);
205if (HA == END_HIDDEN_ARGS || LoadTy != getHiddenArgType(Ctx, HA))
206continue;
207
208 ImplicitArgLoads.push_back(std::make_pair(Load,Offset));
209 }
210 }
211
212if (ImplicitArgLoads.empty())
213return;
214
215// Allocate loads in order of offset. We need to be sure that the implicit
216// argument can actually be preloaded.
217 std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(),less_second());
218
219// If we fail to preload any implicit argument we know we don't have SGPRs
220// to preload any subsequent ones with larger offsets. Find the first
221// argument that we cannot preload.
222auto *PreloadEnd = std::find_if(
223 ImplicitArgLoads.begin(), ImplicitArgLoads.end(),
224 [&](const std::pair<LoadInst *, unsigned> &Load) {
225 unsigned LoadSize = DL.getTypeStoreSize(Load.first->getType());
226 unsigned LoadOffset = Load.second;
227 if (!tryAllocPreloadSGPRs(LoadSize,
228 LoadOffset + ImplicitArgsBaseOffset,
229 LastExplicitArgOffset))
230 return true;
231
232 LastExplicitArgOffset =
233 ImplicitArgsBaseOffset + LoadOffset + LoadSize;
234 return false;
235 });
236
237if (PreloadEnd == ImplicitArgLoads.begin())
238return;
239
240unsigned LastHiddenArgIndex = getHiddenArgFromOffset(PreloadEnd[-1].second);
241Function *NF = cloneFunctionWithPreloadImplicitArgs(LastHiddenArgIndex);
242assert(NF);
243for (constauto *I = ImplicitArgLoads.begin();I != PreloadEnd; ++I) {
244LoadInst *LoadInst =I->first;
245unsigned LoadOffset =I->second;
246unsigned HiddenArgIndex = getHiddenArgFromOffset(LoadOffset);
247unsigned Index = NF->arg_size() - LastHiddenArgIndex + HiddenArgIndex - 1;
248Argument *Arg = NF->getArg(Index);
249LoadInst->replaceAllUsesWith(Arg);
250 }
251 }
252};
253
254classAMDGPULowerKernelArguments :publicFunctionPass {
255public:
256staticcharID;
257
258 AMDGPULowerKernelArguments() :FunctionPass(ID) {}
259
260boolrunOnFunction(Function &F)override;
261
262voidgetAnalysisUsage(AnalysisUsage &AU) const override{
263 AU.addRequired<TargetPassConfig>();
264 AU.setPreservesAll();
265 }
266};
267
268}// end anonymous namespace
269
270// skip allocas
271staticBasicBlock::iteratorgetInsertPt(BasicBlock &BB) {
272BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
273for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
274AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
275
276// If this is a dynamic alloca, the value may depend on the loaded kernargs,
277// so loads will need to be inserted before it.
278if (!AI || !AI->isStaticAlloca())
279break;
280 }
281
282return InsPt;
283}
284
285staticboollowerKernelArguments(Function &F,constTargetMachine &TM) {
286CallingConv::IDCC =F.getCallingConv();
287if (CC !=CallingConv::AMDGPU_KERNEL ||F.arg_empty())
288returnfalse;
289
290constGCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
291LLVMContext &Ctx =F.getParent()->getContext();
292constDataLayout &DL =F.getDataLayout();
293BasicBlock &EntryBlock = *F.begin();
294IRBuilder<> Builder(&EntryBlock,getInsertPt(EntryBlock));
295
296constAlign KernArgBaseAlign(16);// FIXME: Increase if necessary
297constuint64_t BaseOffset = ST.getExplicitKernelArgOffset();
298
299Align MaxAlign;
300// FIXME: Alignment is broken with explicit arg offset.;
301constuint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
302if (TotalKernArgSize == 0)
303returnfalse;
304
305CallInst *KernArgSegment =
306 Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
307nullptr,F.getName() +".kernarg.segment");
308 KernArgSegment->addRetAttr(Attribute::NonNull);
309 KernArgSegment->addRetAttr(
310Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
311
312uint64_t ExplicitArgOffset = 0;
313// Preloaded kernel arguments must be sequential.
314bool InPreloadSequence =true;
315 PreloadKernelArgInfo PreloadInfo(F, ST);
316
317for (Argument &Arg :F.args()) {
318constbool IsByRef = Arg.hasByRefAttr();
319Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
320MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
321Align ABITypeAlign =DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
322
323uint64_tSize =DL.getTypeSizeInBits(ArgTy);
324uint64_t AllocSize =DL.getTypeAllocSize(ArgTy);
325
326uint64_t EltOffset =alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
327uint64_t LastExplicitArgOffset = ExplicitArgOffset;
328 ExplicitArgOffset =alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
329
330// Guard against the situation where hidden arguments have already been
331// lowered and added to the kernel function signiture, i.e. in a situation
332// where this pass has run twice.
333if (Arg.hasAttribute("amdgpu-hidden-argument"))
334break;
335
336// Try to preload this argument into user SGPRs.
337if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() &&
338 !Arg.getType()->isAggregateType())
339if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset,
340 LastExplicitArgOffset))
341continue;
342
343 InPreloadSequence =false;
344
345if (Arg.use_empty())
346continue;
347
348// If this is byval, the loads are already explicit in the function. We just
349// need to rewrite the pointer values.
350if (IsByRef) {
351Value *ArgOffsetPtr = Builder.CreateConstInBoundsGEP1_64(
352 Builder.getInt8Ty(), KernArgSegment, EltOffset,
353 Arg.getName() +".byval.kernarg.offset");
354
355Value *CastOffsetPtr =
356 Builder.CreateAddrSpaceCast(ArgOffsetPtr, Arg.getType());
357 Arg.replaceAllUsesWith(CastOffsetPtr);
358continue;
359 }
360
361if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
362// FIXME: Hack. We rely on AssertZext to be able to fold DS addressing
363// modes on SI to know the high bits are 0 so pointer adds don't wrap. We
364// can't represent this with range metadata because it's only allowed for
365// integer types.
366if ((PT->getAddressSpace() ==AMDGPUAS::LOCAL_ADDRESS ||
367 PT->getAddressSpace() ==AMDGPUAS::REGION_ADDRESS) &&
368 !ST.hasUsableDSOffset())
369continue;
370
371// FIXME: We can replace this with equivalent alias.scope/noalias
372// metadata, but this appears to be a lot of work.
373if (Arg.hasNoAliasAttr())
374continue;
375 }
376
377auto *VT = dyn_cast<FixedVectorType>(ArgTy);
378bool IsV3 = VT && VT->getNumElements() == 3;
379bool DoShiftOpt =Size < 32 && !ArgTy->isAggregateType();
380
381VectorType *V4Ty =nullptr;
382
383 int64_t AlignDownOffset =alignDown(EltOffset, 4);
384 int64_t OffsetDiff = EltOffset - AlignDownOffset;
385Align AdjustedAlign =commonAlignment(
386 KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);
387
388Value *ArgPtr;
389Type *AdjustedArgTy;
390if (DoShiftOpt) {// FIXME: Handle aggregate types
391// Since we don't have sub-dword scalar loads, avoid doing an extload by
392// loading earlier than the argument address, and extracting the relevant
393// bits.
394// TODO: Update this for GFX12 which does have scalar sub-dword loads.
395//
396// Additionally widen any sub-dword load to i32 even if suitably aligned,
397// so that CSE between different argument loads works easily.
398 ArgPtr = Builder.CreateConstInBoundsGEP1_64(
399 Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
400 Arg.getName() +".kernarg.offset.align.down");
401 AdjustedArgTy = Builder.getInt32Ty();
402 }else {
403 ArgPtr = Builder.CreateConstInBoundsGEP1_64(
404 Builder.getInt8Ty(), KernArgSegment, EltOffset,
405 Arg.getName() +".kernarg.offset");
406 AdjustedArgTy = ArgTy;
407 }
408
409if (IsV3 &&Size >= 32) {
410 V4Ty =FixedVectorType::get(VT->getElementType(), 4);
411// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
412 AdjustedArgTy = V4Ty;
413 }
414
415LoadInst *Load =
416 Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
417 Load->setMetadata(LLVMContext::MD_invariant_load,MDNode::get(Ctx, {}));
418
419MDBuilder MDB(Ctx);
420
421if (Arg.hasAttribute(Attribute::NoUndef))
422 Load->setMetadata(LLVMContext::MD_noundef,MDNode::get(Ctx, {}));
423
424if (Arg.hasAttribute(Attribute::Range)) {
425constConstantRange &Range =
426 Arg.getAttribute(Attribute::Range).getValueAsConstantRange();
427 Load->setMetadata(LLVMContext::MD_range,
428 MDB.createRange(Range.getLower(),Range.getUpper()));
429 }
430
431if (isa<PointerType>(ArgTy)) {
432if (Arg.hasNonNullAttr())
433 Load->setMetadata(LLVMContext::MD_nonnull,MDNode::get(Ctx, {}));
434
435uint64_t DerefBytes = Arg.getDereferenceableBytes();
436if (DerefBytes != 0) {
437 Load->setMetadata(
438 LLVMContext::MD_dereferenceable,
439MDNode::get(Ctx,
440 MDB.createConstant(
441 ConstantInt::get(Builder.getInt64Ty(), DerefBytes))));
442 }
443
444uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes();
445if (DerefOrNullBytes != 0) {
446 Load->setMetadata(
447 LLVMContext::MD_dereferenceable_or_null,
448MDNode::get(Ctx,
449 MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),
450 DerefOrNullBytes))));
451 }
452
453if (MaybeAlign ParamAlign = Arg.getParamAlign()) {
454 Load->setMetadata(
455 LLVMContext::MD_align,
456MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
457 Builder.getInt64Ty(), ParamAlign->value()))));
458 }
459 }
460
461// TODO: Convert noalias arg to !noalias
462
463if (DoShiftOpt) {
464Value *ExtractBits = OffsetDiff == 0 ?
465 Load : Builder.CreateLShr(Load, OffsetDiff * 8);
466
467IntegerType *ArgIntTy = Builder.getIntNTy(Size);
468Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);
469Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,
470 Arg.getName() +".load");
471 Arg.replaceAllUsesWith(NewVal);
472 }elseif (IsV3) {
473Value *Shuf = Builder.CreateShuffleVector(Load,ArrayRef<int>{0, 1, 2},
474 Arg.getName() +".load");
475 Arg.replaceAllUsesWith(Shuf);
476 }else {
477 Load->setName(Arg.getName() +".load");
478 Arg.replaceAllUsesWith(Load);
479 }
480 }
481
482 KernArgSegment->addRetAttr(
483Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));
484
485if (InPreloadSequence) {
486uint64_t ImplicitArgsBaseOffset =
487alignTo(ExplicitArgOffset, ST.getAlignmentForImplicitArgPtr()) +
488 BaseOffset;
489 PreloadInfo.tryAllocImplicitArgPreloadSGPRs(ImplicitArgsBaseOffset,
490 ExplicitArgOffset, Builder);
491 }
492
493returntrue;
494}
495
496bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
497auto &TPC = getAnalysis<TargetPassConfig>();
498constTargetMachine &TM = TPC.getTM<TargetMachine>();
499returnlowerKernelArguments(F, TM);
500}
501
502INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments,DEBUG_TYPE,
503"AMDGPU Lower Kernel Arguments",false,false)
504INITIALIZE_PASS_END(AMDGPULowerKernelArguments,DEBUG_TYPE, "AMDGPULower KernelArguments",
505false,false)
506
507char AMDGPULowerKernelArguments::ID = 0;
508
509FunctionPass *llvm::createAMDGPULowerKernelArgumentsPass() {
510returnnew AMDGPULowerKernelArguments();
511}
512
513PreservedAnalyses
514AMDGPULowerKernelArgumentsPass::run(Function &F,FunctionAnalysisManager &AM) {
515bool Changed =lowerKernelArguments(F, TM);
516if (Changed) {
517// TODO: Preserves a lot more.
518PreservedAnalyses PA;
519 PA.preserveSet<CFGAnalyses>();
520return PA;
521 }
522
523returnPreservedAnalyses::all();
524}
Arguments
AMDGPU Lower Kernel Arguments
Definition:AMDGPULowerKernelArguments.cpp:504
getInsertPt
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
Definition:AMDGPULowerKernelArguments.cpp:271
lowerKernelArguments
static bool lowerKernelArguments(Function &F, const TargetMachine &TM)
Definition:AMDGPULowerKernelArguments.cpp:285
DEBUG_TYPE
#define DEBUG_TYPE
Definition:AMDGPULowerKernelArguments.cpp:24
AMDGPU.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...
Size
uint64_t Size
Definition:ELFObjHandler.cpp:81
GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.
IRBuilder.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
MDBuilder.h
Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:57
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:52
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.
ValueTracking.h
FunctionType
Definition:ItaniumDemangle.h:823
PointerType
Definition:ItaniumDemangle.h:627
VectorType
Definition:ItaniumDemangle.h:1173
llvm::AMDGPULowerKernelArgumentsPass::run
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
Definition:AMDGPULowerKernelArguments.cpp:514
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition:Instructions.h:63
llvm::AllocaInst::isStaticAlloca
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Definition:Instructions.cpp:1234
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition:PassManager.h:253
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition:PassAnalysisSupport.h:47
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition:PassAnalysisSupport.h:75
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition:PassAnalysisSupport.h:130
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition:Argument.h:31
llvm::Argument::getArgNo
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition:Argument.h:49
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::AttrBuilder
Definition:Attributes.h:1064
llvm::AttributeList
Definition:Attributes.h:490
llvm::Attribute::getWithDereferenceableBytes
static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
Definition:Attributes.cpp:244
llvm::Attribute::getWithAlignment
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition:Attributes.cpp:234
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::end
iterator end()
Definition:BasicBlock.h:474
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition:BasicBlock.cpp:437
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition:BasicBlock.h:177
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition:Analysis.h:72
llvm::CallBase::addRetAttr
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Definition:InstrTypes.h:1484
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::ConstantRange
This class represents a range of values.
Definition:ConstantRange.h:47
llvm::ConstantRange::getLower
const APInt & getLower() const
Return the lower value for this range.
Definition:ConstantRange.h:203
llvm::ConstantRange::getUpper
const APInt & getUpper() const
Return the upper value for this range.
Definition:ConstantRange.h:206
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition:Type.cpp:791
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition:Pass.h:310
llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
llvm::Function
Definition:Function.h:63
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition:Function.h:173
llvm::Function::splice
void splice(Function::iterator ToIt, Function *FromF)
Transfer all blocks from FromF to this function at ToIt.
Definition:Function.h:761
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition:Function.h:353
llvm::Function::begin
iterator begin()
Definition:Function.h:853
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition:Function.h:868
llvm::Function::setAttributes
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
Definition:Function.h:356
llvm::Function::arg_size
size_t arg_size() const
Definition:Function.h:901
llvm::Function::setIsNewDbgInfoFormat
void setIsNewDbgInfoFormat(bool NewVal)
Definition:Function.cpp:105
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition:Function.h:886
llvm::Function::copyAttributesFrom
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition:Function.cpp:860
llvm::GCNSubtarget
Definition:GCNSubtarget.h:34
llvm::GCNUserSGPRUsageInfo
Definition:GCNSubtarget.h:1660
llvm::GCNUserSGPRUsageInfo::getNumFreeUserSGPRs
unsigned getNumFreeUserSGPRs()
Definition:GCNSubtarget.cpp:674
llvm::GlobalObject::copyMetadata
void copyMetadata(const GlobalObject *Src, unsigned Offset)
Copy metadata from Src, adjusting offsets by Offset.
Definition:Metadata.cpp:1799
llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition:IRBuilder.h:558
llvm::IRBuilderBase::CreateAlignedLoad
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition:IRBuilder.h:1815
llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition:IRBuilder.h:1480
llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition:IRBuilder.h:545
llvm::IRBuilderBase::getInt64Ty
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition:IRBuilder.h:550
llvm::IRBuilderBase::CreateIntrinsic
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition:IRBuilder.cpp:900
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition:IRBuilder.h:2152
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition:IRBuilder.h:2533
llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition:IRBuilder.h:2019
llvm::IRBuilderBase::CreateConstInBoundsGEP1_64
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition:IRBuilder.h:1944
llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition:IRBuilder.h:535
llvm::IRBuilderBase::CreateAddrSpaceCast
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition:IRBuilder.h:2157
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition:IRBuilder.h:2705
llvm::Instruction
Definition:Instruction.h:68
llvm::IntegerType
Class to represent integer types.
Definition:DerivedTypes.h:42
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition:LLVMContext.h:67
llvm::LoadInst
An instruction for reading from memory.
Definition:Instructions.h:176
llvm::MDBuilder
Definition:MDBuilder.h:36
llvm::MDBuilder::createConstant
ConstantAsMetadata * createConstant(Constant *C)
Return the given constant as metadata.
Definition:MDBuilder.cpp:24
llvm::MDBuilder::createRange
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition:MDBuilder.cpp:95
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition:Metadata.h:1549
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition:Pass.cpp:98
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition:Analysis.h:111
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition:Analysis.h:117
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition:Analysis.h:146
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVectorTemplateCommon::end
iterator end()
Definition:SmallVector.h:269
llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition:SmallVector.h:267
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition:TargetMachine.h:77
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition:TargetPassConfig.h:85
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition:Type.h:303
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition:Value.cpp:377
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition:Value.cpp:534
llvm::Value::users
iterator_range< user_iterator > users()
Definition:Value.h:421
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition:Value.cpp:309
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition:Value.cpp:383
llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition:ilist_node.h:32
uint64_t
uint8_t
unsigned
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
TargetMachine.h
false
Definition:StackSlotColoring.cpp:193
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition:AMDGPUAddrSpace.h:32
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition:AMDGPUAddrSpace.h:35
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition:CallingConv.h:200
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm::Intrinsic::getDeclarationIfExists
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition:Intrinsics.cpp:747
llvm::SystemZISD::TM
@ TM
Definition:SystemZISelLowering.h:66
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::Offset
@ Offset
Definition:DWP.cpp:480
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition:Alignment.h:145
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition:ValueTracking.h:639
llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition:MathExtras.h:556
llvm::HexPrintStyle::Lower
@ Lower
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition:AMDGPULowerKernelArguments.cpp:509
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition:Alignment.h:155
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition:Alignment.h:212
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition:Alignment.h:117
llvm::less_second
Function object to check whether the second component of a container supported by std::get (like std:...
Definition:STLExtras.h:1476

Generated on Fri Jul 18 2025 13:10:20 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp