Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Implements the AMDGPU specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUSubtarget.h"
15#include "AMDGPUCallLowering.h"
16#include "AMDGPUInstructionSelector.h"
17#include "AMDGPULegalizerInfo.h"
18#include "AMDGPURegisterBankInfo.h"
19#include "R600Subtarget.h"
20#include "SIMachineFunctionInfo.h"
21#include "Utils/AMDGPUBaseInfo.h"
22#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
23#include "llvm/CodeGen/MachineScheduler.h"
24#include "llvm/CodeGen/TargetFrameLowering.h"
25#include "llvm/IR/DiagnosticInfo.h"
26#include "llvm/IR/IntrinsicsAMDGPU.h"
27#include "llvm/IR/IntrinsicsR600.h"
28#include "llvm/IR/MDBuilder.h"
29#include <algorithm>
30
31using namespacellvm;
32
33#define DEBUG_TYPE "amdgpu-subtarget"
34
35AMDGPUSubtarget::AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
36
37boolAMDGPUSubtarget::useRealTrue16Insts() const{
38returnhasTrue16BitInsts() &&EnableRealTrue16Insts;
39}
40
41// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
42// allows the given function to achieve an occupancy of NWaves waves per
43// SIMD / EU, taking into account only the function's *maximum* workgroup size.
44unsigned
45AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
46constFunction &F) const{
47constunsigned WaveSize =getWavefrontSize();
48constunsigned WorkGroupSize =getFlatWorkGroupSizes(F).second;
49constunsigned WavesPerWorkgroup =
50 std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);
51
52constunsigned WorkGroupsPerCU =
53 std::max(1u, (NWaves *getEUsPerCU()) / WavesPerWorkgroup);
54
55returngetLocalMemorySize() / WorkGroupsPerCU;
56}
57
58std::pair<unsigned, unsigned>
59AMDGPUSubtarget::getOccupancyWithWorkGroupSizes(uint32_t LDSBytes,
60constFunction &F) const{
61// FIXME: We should take into account the LDS allocation granularity.
62constunsigned MaxWGsLDS =getLocalMemorySize() / std::max(LDSBytes, 1u);
63
64// Queried LDS size may be larger than available on a CU, in which case we
65// consider the only achievable occupancy to be 1, in line with what we
66// consider the occupancy to be when the number of requested registers in a
67// particular bank is higher than the number of available ones in that bank.
68if (!MaxWGsLDS)
69return {1, 1};
70
71constunsigned WaveSize =getWavefrontSize(), WavesPerEU =getMaxWavesPerEU();
72
73auto PropsFromWGSize = [=](unsigned WGSize)
74 -> std::tuple<const unsigned, const unsigned, unsigned> {
75unsigned WavesPerWG =divideCeil(WGSize, WaveSize);
76unsigned WGsPerCU = std::min(getMaxWorkGroupsPerCU(WGSize), MaxWGsLDS);
77return {WavesPerWG, WGsPerCU, WavesPerWG * WGsPerCU};
78 };
79
80// The maximum group size will generally yield the minimum number of
81// workgroups, maximum number of waves, and minimum occupancy. The opposite is
82// generally true for the minimum group size. LDS or barrier ressource
83// limitations can flip those minimums/maximums.
84constauto [MinWGSize, MaxWGSize] =getFlatWorkGroupSizes(F);
85auto [MinWavesPerWG, MaxWGsPerCU, MaxWavesPerCU] = PropsFromWGSize(MinWGSize);
86auto [MaxWavesPerWG, MinWGsPerCU, MinWavesPerCU] = PropsFromWGSize(MaxWGSize);
87
88// It is possible that we end up with flipped minimum and maximum number of
89// waves per CU when the number of minimum/maximum concurrent groups on the CU
90// is limited by LDS usage or barrier resources.
91if (MinWavesPerCU >= MaxWavesPerCU) {
92std::swap(MinWavesPerCU, MaxWavesPerCU);
93 }else {
94constunsigned WaveSlotsPerCU = WavesPerEU *getEUsPerCU();
95
96// Look for a potential smaller group size than the maximum which decreases
97// the concurrent number of waves on the CU for the same number of
98// concurrent workgroups on the CU.
99unsigned MinWavesPerCUForWGSize =
100divideCeil(WaveSlotsPerCU, MinWGsPerCU + 1) * MinWGsPerCU;
101if (MinWavesPerCU > MinWavesPerCUForWGSize) {
102unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;
103if (unsigned ExcessSlotsPerWG = ExcessSlots / MinWGsPerCU) {
104// There may exist a smaller group size than the maximum that achieves
105// the minimum number of waves per CU. This group size is the largest
106// possible size that requires MaxWavesPerWG - E waves where E is
107// maximized under the following constraints.
108// 1. 0 <= E <= ExcessSlotsPerWG
109// 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize
110 MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,
111 MaxWavesPerWG - MinWavesPerWG);
112 }
113 }
114
115// Look for a potential larger group size than the minimum which increases
116// the concurrent number of waves on the CU for the same number of
117// concurrent workgroups on the CU.
118unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;
119if (unsigned LeftoverSlotsPerWG = LeftoverSlots / MaxWGsPerCU) {
120// There may exist a larger group size than the minimum that achieves the
121// maximum number of waves per CU. This group size is the smallest
122// possible size that requires MinWavesPerWG + L waves where L is
123// maximized under the following constraints.
124// 1. 0 <= L <= LeftoverSlotsPerWG
125// 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize
126 MaxWavesPerCU += MaxWGsPerCU * std::min(LeftoverSlotsPerWG,
127 ((MaxWGSize - 1) / WaveSize) + 1 -
128 MinWavesPerWG);
129 }
130 }
131
132// Return the minimum/maximum number of waves on any EU, assuming that all
133// wavefronts are spread across all EUs as evenly as possible.
134return {std::clamp(MinWavesPerCU /getEUsPerCU(), 1U, WavesPerEU),
135 std::clamp(divideCeil(MaxWavesPerCU,getEUsPerCU()), 1U, WavesPerEU)};
136}
137
138std::pair<unsigned, unsigned>AMDGPUSubtarget::getOccupancyWithWorkGroupSizes(
139constMachineFunction &MF) const{
140constauto *MFI = MF.getInfo<SIMachineFunctionInfo>();
141returngetOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());
142}
143
144std::pair<unsigned, unsigned>
145AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::IDCC) const{
146switch (CC) {
147caseCallingConv::AMDGPU_VS:
148caseCallingConv::AMDGPU_LS:
149caseCallingConv::AMDGPU_HS:
150caseCallingConv::AMDGPU_ES:
151caseCallingConv::AMDGPU_GS:
152caseCallingConv::AMDGPU_PS:
153return std::pair(1,getWavefrontSize());
154default:
155return std::pair(1u,getMaxFlatWorkGroupSize());
156 }
157}
158
159std::pair<unsigned, unsigned>AMDGPUSubtarget::getFlatWorkGroupSizes(
160constFunction &F) const{
161// Default minimum/maximum flat work group sizes.
162 std::pair<unsigned, unsigned>Default =
163getDefaultFlatWorkGroupSize(F.getCallingConv());
164
165// Requested minimum/maximum flat work group sizes.
166 std::pair<unsigned, unsigned> Requested =AMDGPU::getIntegerPairAttribute(
167F,"amdgpu-flat-work-group-size",Default);
168
169// Make sure requested minimum is less than requested maximum.
170if (Requested.first > Requested.second)
171returnDefault;
172
173// Make sure requested values do not violate subtarget's specifications.
174if (Requested.first <getMinFlatWorkGroupSize())
175returnDefault;
176if (Requested.second >getMaxFlatWorkGroupSize())
177returnDefault;
178
179return Requested;
180}
181
182std::pair<unsigned, unsigned>AMDGPUSubtarget::getEffectiveWavesPerEU(
183 std::pair<unsigned, unsigned> Requested,
184 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const{
185// Default minimum/maximum number of waves per execution unit.
186 std::pair<unsigned, unsigned>Default(1,getMaxWavesPerEU());
187
188// If minimum/maximum flat work group sizes were explicitly requested using
189// "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum
190// number of waves per execution unit to values implied by requested
191// minimum/maximum flat work group sizes.
192unsigned MinImpliedByFlatWorkGroupSize =
193getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
194Default.first = MinImpliedByFlatWorkGroupSize;
195
196// Make sure requested minimum is less than requested maximum.
197if (Requested.second && Requested.first > Requested.second)
198returnDefault;
199
200// Make sure requested values do not violate subtarget's specifications.
201if (Requested.first <getMinWavesPerEU() ||
202 Requested.second >getMaxWavesPerEU())
203returnDefault;
204
205// Make sure requested values are compatible with values implied by requested
206// minimum/maximum flat work group sizes.
207if (Requested.first < MinImpliedByFlatWorkGroupSize)
208returnDefault;
209
210return Requested;
211}
212
213std::pair<unsigned, unsigned>AMDGPUSubtarget::getWavesPerEU(
214constFunction &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const{
215// Default minimum/maximum number of waves per execution unit.
216 std::pair<unsigned, unsigned>Default(1,getMaxWavesPerEU());
217
218// Requested minimum/maximum number of waves per execution unit.
219 std::pair<unsigned, unsigned> Requested =
220AMDGPU::getIntegerPairAttribute(F,"amdgpu-waves-per-eu",Default,true);
221returngetEffectiveWavesPerEU(Requested, FlatWorkGroupSizes);
222}
223
224staticunsignedgetReqdWorkGroupSize(constFunction &Kernel,unsigned Dim) {
225auto *Node = Kernel.getMetadata("reqd_work_group_size");
226if (Node &&Node->getNumOperands() == 3)
227return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
228return std::numeric_limits<unsigned>::max();
229}
230
231boolAMDGPUSubtarget::isMesaKernel(constFunction &F) const{
232returnisMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
233}
234
235unsignedAMDGPUSubtarget::getMaxWorkitemID(constFunction &Kernel,
236unsigned Dimension) const{
237unsigned ReqdSize =getReqdWorkGroupSize(Kernel, Dimension);
238if (ReqdSize != std::numeric_limits<unsigned>::max())
239return ReqdSize - 1;
240returngetFlatWorkGroupSizes(Kernel).second - 1;
241}
242
243boolAMDGPUSubtarget::isSingleLaneExecution(constFunction &Func) const{
244for (intI = 0;I < 3; ++I) {
245if (getMaxWorkitemID(Func,I) > 0)
246returnfalse;
247 }
248
249returntrue;
250}
251
252boolAMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const{
253Function *Kernel =I->getParent()->getParent();
254unsigned MinSize = 0;
255unsigned MaxSize =getFlatWorkGroupSizes(*Kernel).second;
256bool IdQuery =false;
257
258// If reqd_work_group_size is present it narrows value down.
259if (auto *CI = dyn_cast<CallInst>(I)) {
260constFunction *F = CI->getCalledFunction();
261if (F) {
262unsigned Dim = UINT_MAX;
263switch (F->getIntrinsicID()) {
264case Intrinsic::amdgcn_workitem_id_x:
265case Intrinsic::r600_read_tidig_x:
266 IdQuery =true;
267 [[fallthrough]];
268case Intrinsic::r600_read_local_size_x:
269 Dim = 0;
270break;
271case Intrinsic::amdgcn_workitem_id_y:
272case Intrinsic::r600_read_tidig_y:
273 IdQuery =true;
274 [[fallthrough]];
275case Intrinsic::r600_read_local_size_y:
276 Dim = 1;
277break;
278case Intrinsic::amdgcn_workitem_id_z:
279case Intrinsic::r600_read_tidig_z:
280 IdQuery =true;
281 [[fallthrough]];
282case Intrinsic::r600_read_local_size_z:
283 Dim = 2;
284break;
285default:
286break;
287 }
288
289if (Dim <= 3) {
290unsigned ReqdSize =getReqdWorkGroupSize(*Kernel, Dim);
291if (ReqdSize != std::numeric_limits<unsigned>::max())
292 MinSize = MaxSize = ReqdSize;
293 }
294 }
295 }
296
297if (!MaxSize)
298returnfalse;
299
300// Range metadata is [Lo, Hi). For ID query we need to pass max size
301// as Hi. For size query we need to pass Hi + 1.
302if (IdQuery)
303 MinSize = 0;
304else
305 ++MaxSize;
306
307APIntLower{32, MinSize};
308APIntUpper{32, MaxSize};
309if (auto *CI = dyn_cast<CallBase>(I)) {
310ConstantRangeRange(Lower,Upper);
311 CI->addRangeRetAttr(Range);
312 }else {
313MDBuilder MDB(I->getContext());
314MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower,Upper);
315I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
316 }
317returntrue;
318}
319
320unsignedAMDGPUSubtarget::getImplicitArgNumBytes(constFunction &F) const{
321assert(AMDGPU::isKernel(F.getCallingConv()));
322
323// We don't allocate the segment if we know the implicit arguments weren't
324// used, even if the ABI implies we need them.
325if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
326return 0;
327
328if (isMesaKernel(F))
329return 16;
330
331// Assume all implicit inputs are used by default
332constModule *M =F.getParent();
333unsigned NBytes =
334AMDGPU::getAMDHSACodeObjectVersion(*M) >=AMDGPU::AMDHSA_COV5 ? 256 : 56;
335returnF.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
336 NBytes);
337}
338
339uint64_tAMDGPUSubtarget::getExplicitKernArgSize(constFunction &F,
340Align &MaxAlign) const{
341assert(F.getCallingConv() ==CallingConv::AMDGPU_KERNEL ||
342F.getCallingConv() ==CallingConv::SPIR_KERNEL);
343
344constDataLayout &DL =F.getDataLayout();
345uint64_t ExplicitArgBytes = 0;
346 MaxAlign =Align(1);
347
348for (constArgument &Arg :F.args()) {
349if (Arg.hasAttribute("amdgpu-hidden-argument"))
350continue;
351
352constbool IsByRef = Arg.hasByRefAttr();
353Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
354Align Alignment =DL.getValueOrABITypeAlignment(
355 IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy);
356uint64_t AllocSize =DL.getTypeAllocSize(ArgTy);
357 ExplicitArgBytes =alignTo(ExplicitArgBytes, Alignment) + AllocSize;
358 MaxAlign = std::max(MaxAlign, Alignment);
359 }
360
361return ExplicitArgBytes;
362}
363
364unsignedAMDGPUSubtarget::getKernArgSegmentSize(constFunction &F,
365Align &MaxAlign) const{
366if (F.getCallingConv() !=CallingConv::AMDGPU_KERNEL &&
367F.getCallingConv() !=CallingConv::SPIR_KERNEL)
368return 0;
369
370uint64_t ExplicitArgBytes =getExplicitKernArgSize(F, MaxAlign);
371
372unsigned ExplicitOffset =getExplicitKernelArgOffset();
373
374uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
375unsigned ImplicitBytes =getImplicitArgNumBytes(F);
376if (ImplicitBytes != 0) {
377constAlign Alignment =getAlignmentForImplicitArgPtr();
378 TotalSize =alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
379 MaxAlign = std::max(MaxAlign, Alignment);
380 }
381
382// Being able to dereference past the end is useful for emitting scalar loads.
383returnalignTo(TotalSize, 4);
384}
385
386AMDGPUDwarfFlavourAMDGPUSubtarget::getAMDGPUDwarfFlavour() const{
387returngetWavefrontSize() == 32 ?AMDGPUDwarfFlavour::Wave32
388 :AMDGPUDwarfFlavour::Wave64;
389}
390
391constAMDGPUSubtarget &AMDGPUSubtarget::get(constMachineFunction &MF) {
392if (MF.getTarget().getTargetTriple().getArch() ==Triple::amdgcn)
393returnstatic_cast<constAMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
394returnstatic_cast<constAMDGPUSubtarget &>(MF.getSubtarget<R600Subtarget>());
395}
396
397constAMDGPUSubtarget &AMDGPUSubtarget::get(constTargetMachine &TM,constFunction &F) {
398if (TM.getTargetTriple().getArch() ==Triple::amdgcn)
399returnstatic_cast<constAMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
400returnstatic_cast<constAMDGPUSubtarget &>(
401 TM.getSubtarget<R600Subtarget>(F));
402}
403
404// FIXME: This has no reason to be in subtarget
405SmallVector<unsigned>
406AMDGPUSubtarget::getMaxNumWorkGroups(constFunction &F) const{
407returnAMDGPU::getIntegerVecAttribute(F,"amdgpu-max-num-workgroups", 3,
408 std::numeric_limits<uint32_t>::max());
409}
AMDGPUBaseInfo.h
AMDGPUCallLowering.h
This file describes how to lower LLVM calls to machine code calls.
AMDGPUInstructionSelector.h
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.
AMDGPURegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
getReqdWorkGroupSize
static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim)
Definition:AMDGPUSubtarget.cpp:224
AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
DiagnosticInfo.h
InlineAsmLowering.h
This file describes how to lower LLVM inline asm to machine code INLINEASM.
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
MDBuilder.h
MachineScheduler.h
Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
if
if(PassOpts->AAPipeline)
Definition:PassBuilderBindings.cpp:64
R600Subtarget.h
AMDGPU R600 specific subclass of TargetSubtarget.
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SIMachineFunctionInfo.h
TargetFrameLowering.h
Node
Definition:ItaniumDemangle.h:163
llvm::AMDGPUSubtarget
Definition:AMDGPUSubtarget.h:29
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition:AMDGPUSubtarget.h:154
llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition:AMDGPUSubtarget.cpp:59
llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition:AMDGPUSubtarget.cpp:145
llvm::AMDGPUSubtarget::EnableRealTrue16Insts
bool EnableRealTrue16Insts
Definition:AMDGPUSubtarget.h:60
llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition:AMDGPUSubtarget.h:288
llvm::AMDGPUSubtarget::getEUsPerCU
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
Definition:AMDGPUSubtarget.h:286
llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition:AMDGPUSubtarget.cpp:231
llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition:AMDGPUSubtarget.h:109
llvm::AMDGPUSubtarget::useRealTrue16Insts
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Definition:AMDGPUSubtarget.cpp:37
llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0
llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition:AMDGPUSubtarget.cpp:159
llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition:AMDGPUSubtarget.cpp:252
llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition:AMDGPUSubtarget.cpp:235
llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition:AMDGPUSubtarget.cpp:320
llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
Definition:AMDGPUSubtarget.h:271
llvm::AMDGPUSubtarget::getMaxNumWorkGroups
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
Definition:AMDGPUSubtarget.cpp:406
llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition:AMDGPUSubtarget.cpp:364
llvm::AMDGPUSubtarget::hasTrue16BitInsts
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
Definition:AMDGPUSubtarget.h:177
llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition:AMDGPUSubtarget.cpp:386
llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition:AMDGPUSubtarget.cpp:45
llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0
llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(Triple TT)
Definition:AMDGPUSubtarget.cpp:35
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition:AMDGPUSubtarget.h:294
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition:AMDGPUSubtarget.h:331
llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition:AMDGPUSubtarget.cpp:339
llvm::AMDGPUSubtarget::getEffectiveWavesPerEU
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > WavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
Definition:AMDGPUSubtarget.cpp:182
llvm::AMDGPUSubtarget::isSingleLaneExecution
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
Definition:AMDGPUSubtarget.cpp:243
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition:AMDGPUSubtarget.cpp:391
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition:AMDGPUSubtarget.h:259
llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0
llvm::APInt
Class for arbitrary precision integers.
Definition:APInt.h:78
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition:Argument.h:31
llvm::ConstantRange
This class represents a range of values.
Definition:ConstantRange.h:47
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::Function
Definition:Function.h:63
llvm::GCNSubtarget
Definition:GCNSubtarget.h:34
llvm::GlobalObject::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition:Value.h:565
llvm::Instruction
Definition:Instruction.h:68
llvm::MDBuilder
Definition:MDBuilder.h:36
llvm::MDBuilder::createRange
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition:MDBuilder.cpp:95
llvm::MDNode
Metadata node.
Definition:Metadata.h:1073
llvm::MachineFunction
Definition:MachineFunction.h:267
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition:MachineFunction.h:733
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition:MachineFunction.h:704
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition:MachineFunction.h:831
llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition:MachineFunction.h:729
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition:Module.h:65
llvm::R600Subtarget
Definition:R600Subtarget.h:29
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition:SIMachineFunctionInfo.h:390
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition:TargetMachine.h:77
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition:TargetMachine.h:126
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition:Triple.h:44
llvm::Triple::amdgcn
@ amdgcn
Definition:Triple.h:74
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition:Triple.h:395
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
uint32_t
uint64_t
unsigned
llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition:AMDGPUBaseInfo.h:56
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.h:1301
llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition:AMDGPUBaseInfo.cpp:172
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition:AMDGPUBaseInfo.cpp:2041
llvm::AMDGPU::getIntegerVecAttribute
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
Definition:AMDGPUBaseInfo.cpp:1367
llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition:AMDGPUBaseInfo.cpp:1332
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition:CallingConv.h:188
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition:CallingConv.h:200
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition:CallingConv.h:206
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition:CallingConv.h:191
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition:CallingConv.h:194
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition:CallingConv.h:144
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition:CallingConv.h:218
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition:CallingConv.h:213
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::HexPrintStyle::Upper
@ Upper
llvm::HexPrintStyle::Lower
@ Lower
llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition:MathExtras.h:404
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition:Alignment.h:155
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1873
llvm::InstructionUniformity::Default
@ Default
The result values are uniform if and only if all operands are uniform.
llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition:AMDGPUMCTargetDesc.h:32
llvm::Wave32
@ Wave32
Definition:AMDGPUMCTargetDesc.h:32
llvm::Wave64
@ Wave64
Definition:AMDGPUMCTargetDesc.h:32
std
Implement std::hash so that hash_code can be used in STL containers.
Definition:BitVector.h:858
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39

Generated on Sun Jul 20 2025 11:15:28 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp