Movatterモバイル変換

Go to the documentation of this file.

1//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9/// \file

10/// Implements the AMDGPU specific subclass of TargetSubtarget.

11//

12//===----------------------------------------------------------------------===//

14#include "AMDGPUSubtarget.h"

15#include "AMDGPUCallLowering.h"

16#include "AMDGPUInstructionSelector.h"

17#include "AMDGPULegalizerInfo.h"

18#include "AMDGPURegisterBankInfo.h"

19#include "R600Subtarget.h"

20#include "SIMachineFunctionInfo.h"

21#include "Utils/AMDGPUBaseInfo.h"

22#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"

23#include "llvm/CodeGen/MachineScheduler.h"

24#include "llvm/CodeGen/TargetFrameLowering.h"

25#include "llvm/IR/DiagnosticInfo.h"

26#include "llvm/IR/IntrinsicsAMDGPU.h"

27#include "llvm/IR/IntrinsicsR600.h"

28#include "llvm/IR/MDBuilder.h"

29#include <algorithm>

31using namespacellvm;

33#define DEBUG_TYPE "amdgpu-subtarget"

35AMDGPUSubtarget::AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}

37boolAMDGPUSubtarget::useRealTrue16Insts() const{

38returnhasTrue16BitInsts() &&EnableRealTrue16Insts;

39}

41// Returns the maximum per-workgroup LDS allocation size (in bytes) that still

42// allows the given function to achieve an occupancy of NWaves waves per

43// SIMD / EU, taking into account only the function's *maximum* workgroup size.

44unsigned

45AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,

46constFunction &F) const{

47constunsigned WaveSize =getWavefrontSize();

48constunsigned WorkGroupSize =getFlatWorkGroupSizes(F).second;

49constunsigned WavesPerWorkgroup =

50 std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);

52constunsigned WorkGroupsPerCU =

53 std::max(1u, (NWaves *getEUsPerCU()) / WavesPerWorkgroup);

55returngetLocalMemorySize() / WorkGroupsPerCU;

56}

58std::pair<unsigned, unsigned>

59AMDGPUSubtarget::getOccupancyWithWorkGroupSizes(uint32_t LDSBytes,

60constFunction &F) const{

61// FIXME: We should take into account the LDS allocation granularity.

62constunsigned MaxWGsLDS =getLocalMemorySize() / std::max(LDSBytes, 1u);

64// Queried LDS size may be larger than available on a CU, in which case we

65// consider the only achievable occupancy to be 1, in line with what we

66// consider the occupancy to be when the number of requested registers in a

67// particular bank is higher than the number of available ones in that bank.

68if (!MaxWGsLDS)

69return {1, 1};

71constunsigned WaveSize =getWavefrontSize(), WavesPerEU =getMaxWavesPerEU();

73auto PropsFromWGSize = [=](unsigned WGSize)

74 -> std::tuple<const unsigned, const unsigned, unsigned> {

75unsigned WavesPerWG =divideCeil(WGSize, WaveSize);

76unsigned WGsPerCU = std::min(getMaxWorkGroupsPerCU(WGSize), MaxWGsLDS);

77return {WavesPerWG, WGsPerCU, WavesPerWG * WGsPerCU};

78 };

80// The maximum group size will generally yield the minimum number of

81// workgroups, maximum number of waves, and minimum occupancy. The opposite is

82// generally true for the minimum group size. LDS or barrier ressource

83// limitations can flip those minimums/maximums.

84constauto [MinWGSize, MaxWGSize] =getFlatWorkGroupSizes(F);

85auto [MinWavesPerWG, MaxWGsPerCU, MaxWavesPerCU] = PropsFromWGSize(MinWGSize);

86auto [MaxWavesPerWG, MinWGsPerCU, MinWavesPerCU] = PropsFromWGSize(MaxWGSize);

88// It is possible that we end up with flipped minimum and maximum number of

89// waves per CU when the number of minimum/maximum concurrent groups on the CU

90// is limited by LDS usage or barrier resources.

91if (MinWavesPerCU >= MaxWavesPerCU) {

92std::swap(MinWavesPerCU, MaxWavesPerCU);

93 }else {

94constunsigned WaveSlotsPerCU = WavesPerEU *getEUsPerCU();

96// Look for a potential smaller group size than the maximum which decreases

97// the concurrent number of waves on the CU for the same number of

98// concurrent workgroups on the CU.

99unsigned MinWavesPerCUForWGSize =

100divideCeil(WaveSlotsPerCU, MinWGsPerCU + 1) * MinWGsPerCU;

101if (MinWavesPerCU > MinWavesPerCUForWGSize) {

102unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;

103if (unsigned ExcessSlotsPerWG = ExcessSlots / MinWGsPerCU) {

104// There may exist a smaller group size than the maximum that achieves

105// the minimum number of waves per CU. This group size is the largest

106// possible size that requires MaxWavesPerWG - E waves where E is

107// maximized under the following constraints.

108// 1. 0 <= E <= ExcessSlotsPerWG

109// 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize

110 MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,

111 MaxWavesPerWG - MinWavesPerWG);

112 }

113 }

114

115// Look for a potential larger group size than the minimum which increases

116// the concurrent number of waves on the CU for the same number of

117// concurrent workgroups on the CU.

118unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;

119if (unsigned LeftoverSlotsPerWG = LeftoverSlots / MaxWGsPerCU) {

120// There may exist a larger group size than the minimum that achieves the

121// maximum number of waves per CU. This group size is the smallest

122// possible size that requires MinWavesPerWG + L waves where L is

123// maximized under the following constraints.

124// 1. 0 <= L <= LeftoverSlotsPerWG

125// 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize

126 MaxWavesPerCU += MaxWGsPerCU * std::min(LeftoverSlotsPerWG,

127 ((MaxWGSize - 1) / WaveSize) + 1 -

128 MinWavesPerWG);

129 }

130 }

131

132// Return the minimum/maximum number of waves on any EU, assuming that all

133// wavefronts are spread across all EUs as evenly as possible.

134return {std::clamp(MinWavesPerCU /getEUsPerCU(), 1U, WavesPerEU),

135 std::clamp(divideCeil(MaxWavesPerCU,getEUsPerCU()), 1U, WavesPerEU)};

136}

137

138std::pair<unsigned, unsigned>AMDGPUSubtarget::getOccupancyWithWorkGroupSizes(

139constMachineFunction &MF) const{

140constauto *MFI = MF.getInfo<SIMachineFunctionInfo>();

141returngetOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());

142}

143

144std::pair<unsigned, unsigned>

145AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const{

146switch (CC) {

147caseCallingConv::AMDGPU_VS:

148caseCallingConv::AMDGPU_LS:

149caseCallingConv::AMDGPU_HS:

150caseCallingConv::AMDGPU_ES:

151caseCallingConv::AMDGPU_GS:

152caseCallingConv::AMDGPU_PS:

153return std::pair(1,getWavefrontSize());

154default:

155return std::pair(1u,getMaxFlatWorkGroupSize());

156 }

157}

158

159std::pair<unsigned, unsigned>AMDGPUSubtarget::getFlatWorkGroupSizes(

160constFunction &F) const{

161// Default minimum/maximum flat work group sizes.

162 std::pair<unsigned, unsigned>Default =

163getDefaultFlatWorkGroupSize(F.getCallingConv());

164

165// Requested minimum/maximum flat work group sizes.

166 std::pair<unsigned, unsigned> Requested =AMDGPU::getIntegerPairAttribute(

167F,"amdgpu-flat-work-group-size",Default);

168

169// Make sure requested minimum is less than requested maximum.

170if (Requested.first > Requested.second)

171returnDefault;

172

173// Make sure requested values do not violate subtarget's specifications.

174if (Requested.first <getMinFlatWorkGroupSize())

175returnDefault;

176if (Requested.second >getMaxFlatWorkGroupSize())

177returnDefault;

178

179return Requested;

180}

181

182std::pair<unsigned, unsigned>AMDGPUSubtarget::getEffectiveWavesPerEU(

183 std::pair<unsigned, unsigned> Requested,

184 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const{

185// Default minimum/maximum number of waves per execution unit.

186 std::pair<unsigned, unsigned>Default(1,getMaxWavesPerEU());

187

188// If minimum/maximum flat work group sizes were explicitly requested using

189// "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum

190// number of waves per execution unit to values implied by requested

191// minimum/maximum flat work group sizes.

192unsigned MinImpliedByFlatWorkGroupSize =

193getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);

194Default.first = MinImpliedByFlatWorkGroupSize;

195

196// Make sure requested minimum is less than requested maximum.

197if (Requested.second && Requested.first > Requested.second)

198returnDefault;

199

200// Make sure requested values do not violate subtarget's specifications.

201if (Requested.first <getMinWavesPerEU() ||

202 Requested.second >getMaxWavesPerEU())

203returnDefault;

204

205// Make sure requested values are compatible with values implied by requested

206// minimum/maximum flat work group sizes.

207if (Requested.first < MinImpliedByFlatWorkGroupSize)

208returnDefault;

209

210return Requested;

211}

212

213std::pair<unsigned, unsigned>AMDGPUSubtarget::getWavesPerEU(

214constFunction &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const{

215// Default minimum/maximum number of waves per execution unit.

216 std::pair<unsigned, unsigned>Default(1,getMaxWavesPerEU());

217

218// Requested minimum/maximum number of waves per execution unit.

219 std::pair<unsigned, unsigned> Requested =

220AMDGPU::getIntegerPairAttribute(F,"amdgpu-waves-per-eu",Default,true);

221returngetEffectiveWavesPerEU(Requested, FlatWorkGroupSizes);

222}

223

224staticunsignedgetReqdWorkGroupSize(constFunction &Kernel,unsigned Dim) {

225auto *Node = Kernel.getMetadata("reqd_work_group_size");

226if (Node &&Node->getNumOperands() == 3)

227return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();

228return std::numeric_limits<unsigned>::max();

229}

230

231boolAMDGPUSubtarget::isMesaKernel(constFunction &F) const{

232returnisMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());

233}

234

235unsignedAMDGPUSubtarget::getMaxWorkitemID(constFunction &Kernel,

236unsigned Dimension) const{

237unsigned ReqdSize =getReqdWorkGroupSize(Kernel, Dimension);

238if (ReqdSize != std::numeric_limits<unsigned>::max())

239return ReqdSize - 1;

240returngetFlatWorkGroupSizes(Kernel).second - 1;

241}

242

243boolAMDGPUSubtarget::isSingleLaneExecution(constFunction &Func) const{

244for (intI = 0;I < 3; ++I) {

245if (getMaxWorkitemID(Func,I) > 0)

246returnfalse;

247 }

248

249returntrue;

250}

251

252boolAMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const{

253Function *Kernel =I->getParent()->getParent();

254unsigned MinSize = 0;

255unsigned MaxSize =getFlatWorkGroupSizes(*Kernel).second;

256bool IdQuery =false;

257

258// If reqd_work_group_size is present it narrows value down.

259if (auto *CI = dyn_cast<CallInst>(I)) {

260constFunction *F = CI->getCalledFunction();

261if (F) {

262unsigned Dim = UINT_MAX;

263switch (F->getIntrinsicID()) {

264case Intrinsic::amdgcn_workitem_id_x:

265case Intrinsic::r600_read_tidig_x:

266 IdQuery =true;

267 [[fallthrough]];

268case Intrinsic::r600_read_local_size_x:

269 Dim = 0;

270break;

271case Intrinsic::amdgcn_workitem_id_y:

272case Intrinsic::r600_read_tidig_y:

273 IdQuery =true;

274 [[fallthrough]];

275case Intrinsic::r600_read_local_size_y:

276 Dim = 1;

277break;

278case Intrinsic::amdgcn_workitem_id_z:

279case Intrinsic::r600_read_tidig_z:

280 IdQuery =true;

281 [[fallthrough]];

282case Intrinsic::r600_read_local_size_z:

283 Dim = 2;

284break;

285default:

286break;

287 }

288

289if (Dim <= 3) {

290unsigned ReqdSize =getReqdWorkGroupSize(*Kernel, Dim);

291if (ReqdSize != std::numeric_limits<unsigned>::max())

292 MinSize = MaxSize = ReqdSize;

293 }

294 }

295 }

296

297if (!MaxSize)

298returnfalse;

299

300// Range metadata is [Lo, Hi). For ID query we need to pass max size

301// as Hi. For size query we need to pass Hi + 1.

302if (IdQuery)

303 MinSize = 0;

304else

305 ++MaxSize;

306

307APInt Lower{32, MinSize};

308APInt Upper{32, MaxSize};

309if (auto *CI = dyn_cast<CallBase>(I)) {

310ConstantRange Range(Lower,Upper);

311 CI->addRangeRetAttr(Range);

312 }else {

313MDBuilder MDB(I->getContext());

314MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower,Upper);

315I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);

316 }

317returntrue;

318}

319

320unsignedAMDGPUSubtarget::getImplicitArgNumBytes(constFunction &F) const{

321assert(AMDGPU::isKernel(F.getCallingConv()));

322

323// We don't allocate the segment if we know the implicit arguments weren't

324// used, even if the ABI implies we need them.

325if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))

326return 0;

327

328if (isMesaKernel(F))

329return 16;

330

331// Assume all implicit inputs are used by default

332constModule *M =F.getParent();

333unsigned NBytes =

334AMDGPU::getAMDHSACodeObjectVersion(*M) >=AMDGPU::AMDHSA_COV5 ? 256 : 56;

335returnF.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",

336 NBytes);

337}

338

339uint64_t AMDGPUSubtarget::getExplicitKernArgSize(constFunction &F,

340Align &MaxAlign) const{

341assert(F.getCallingConv() ==CallingConv::AMDGPU_KERNEL ||

342F.getCallingConv() ==CallingConv::SPIR_KERNEL);

343

344constDataLayout &DL =F.getDataLayout();

345uint64_t ExplicitArgBytes = 0;

346 MaxAlign =Align(1);

347

348for (constArgument &Arg :F.args()) {

349if (Arg.hasAttribute("amdgpu-hidden-argument"))

350continue;

351

352constbool IsByRef = Arg.hasByRefAttr();

353Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();

354Align Alignment =DL.getValueOrABITypeAlignment(

355 IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy);

356uint64_t AllocSize =DL.getTypeAllocSize(ArgTy);

357 ExplicitArgBytes =alignTo(ExplicitArgBytes, Alignment) + AllocSize;

358 MaxAlign = std::max(MaxAlign, Alignment);

359 }

360

361return ExplicitArgBytes;

362}

363

364unsignedAMDGPUSubtarget::getKernArgSegmentSize(constFunction &F,

365Align &MaxAlign) const{

366if (F.getCallingConv() !=CallingConv::AMDGPU_KERNEL &&

367F.getCallingConv() !=CallingConv::SPIR_KERNEL)

368return 0;

369

370uint64_t ExplicitArgBytes =getExplicitKernArgSize(F, MaxAlign);

371

372unsigned ExplicitOffset =getExplicitKernelArgOffset();

373

374uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;

375unsigned ImplicitBytes =getImplicitArgNumBytes(F);

376if (ImplicitBytes != 0) {

377constAlign Alignment =getAlignmentForImplicitArgPtr();

378 TotalSize =alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;

379 MaxAlign = std::max(MaxAlign, Alignment);

380 }

381

382// Being able to dereference past the end is useful for emitting scalar loads.

383returnalignTo(TotalSize, 4);

384}

385

386AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour() const{

387returngetWavefrontSize() == 32 ?AMDGPUDwarfFlavour::Wave32

388 :AMDGPUDwarfFlavour::Wave64;

389}

390

391constAMDGPUSubtarget &AMDGPUSubtarget::get(constMachineFunction &MF) {

392if (MF.getTarget().getTargetTriple().getArch() ==Triple::amdgcn)

393returnstatic_cast<constAMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());

394returnstatic_cast<constAMDGPUSubtarget &>(MF.getSubtarget<R600Subtarget>());

395}

396

397constAMDGPUSubtarget &AMDGPUSubtarget::get(constTargetMachine &TM,constFunction &F) {

398if (TM.getTargetTriple().getArch() ==Triple::amdgcn)

399returnstatic_cast<constAMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));

400returnstatic_cast<constAMDGPUSubtarget &>(

401 TM.getSubtarget<R600Subtarget>(F));

402}

403

404// FIXME: This has no reason to be in subtarget

405SmallVector<unsigned>

406AMDGPUSubtarget::getMaxNumWorkGroups(constFunction &F) const{

407returnAMDGPU::getIntegerVecAttribute(F,"amdgpu-max-num-workgroups", 3,

408 std::numeric_limits<uint32_t>::max());

409}

AMDGPUBaseInfo.h

AMDGPUCallLowering.h

This file describes how to lower LLVM calls to machine code calls.

AMDGPUInstructionSelector.h

This file declares the targeting of the InstructionSelector class for AMDGPU.

AMDGPULegalizerInfo.h

This file declares the targeting of the Machinelegalizer class for AMDGPU.

AMDGPURegisterBankInfo.h

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

getReqdWorkGroupSize

static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim)

Definition:AMDGPUSubtarget.cpp:224

AMDGPUSubtarget.h

Base class for AMDGPU specific classes of TargetSubtarget.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

DiagnosticInfo.h

InlineAsmLowering.h

This file describes how to lower LLVM inline asm to machine code INLINEASM.

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

MDBuilder.h

MachineScheduler.h

Range

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

if(PassOpts->AAPipeline)

Definition:PassBuilderBindings.cpp:64

R600Subtarget.h

AMDGPU R600 specific subclass of TargetSubtarget.

auto CC

Definition:RISCVRedundantCopyElimination.cpp:79

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SIMachineFunctionInfo.h

TargetFrameLowering.h

Node

Definition:ItaniumDemangle.h:163

llvm::AMDGPUSubtarget

Definition:AMDGPUSubtarget.h:29

llvm::AMDGPUSubtarget::isMesa3DOS

bool isMesa3DOS() const

Definition:AMDGPUSubtarget.h:154

llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes

std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...

Definition:AMDGPUSubtarget.cpp:59

llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize

std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const

Definition:AMDGPUSubtarget.cpp:145

llvm::AMDGPUSubtarget::EnableRealTrue16Insts

bool EnableRealTrue16Insts

Definition:AMDGPUSubtarget.h:60

llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr

Align getAlignmentForImplicitArgPtr() const

Definition:AMDGPUSubtarget.h:288

llvm::AMDGPUSubtarget::getEUsPerCU

unsigned getEUsPerCU() const

Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...

Definition:AMDGPUSubtarget.h:286

llvm::AMDGPUSubtarget::isMesaKernel

bool isMesaKernel(const Function &F) const

Definition:AMDGPUSubtarget.cpp:231

llvm::AMDGPUSubtarget::getWavesPerEU

std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const

Definition:AMDGPUSubtarget.h:109

llvm::AMDGPUSubtarget::useRealTrue16Insts

bool useRealTrue16Insts() const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...

Definition:AMDGPUSubtarget.cpp:37

llvm::AMDGPUSubtarget::getMinWavesPerEU

virtual unsigned getMinWavesPerEU() const =0

llvm::AMDGPUSubtarget::getFlatWorkGroupSizes

std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const

Definition:AMDGPUSubtarget.cpp:159

llvm::AMDGPUSubtarget::makeLIDRangeMetadata

bool makeLIDRangeMetadata(Instruction *I) const

Creates value range metadata on an workitemid.* intrinsic call or load.

Definition:AMDGPUSubtarget.cpp:252

llvm::AMDGPUSubtarget::getMaxWorkitemID

unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const

Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.

Definition:AMDGPUSubtarget.cpp:235

llvm::AMDGPUSubtarget::getImplicitArgNumBytes

unsigned getImplicitArgNumBytes(const Function &F) const

Definition:AMDGPUSubtarget.cpp:320

llvm::AMDGPUSubtarget::getLocalMemorySize

unsigned getLocalMemorySize() const

Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.

Definition:AMDGPUSubtarget.h:271

llvm::AMDGPUSubtarget::getMaxNumWorkGroups

SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const

Return the number of work groups for the function.

Definition:AMDGPUSubtarget.cpp:406

llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup

virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0

llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU

virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0

llvm::AMDGPUSubtarget::getKernArgSegmentSize

unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const

Definition:AMDGPUSubtarget.cpp:364

llvm::AMDGPUSubtarget::hasTrue16BitInsts

bool hasTrue16BitInsts() const

Return true if the subtarget supports True16 instructions.

Definition:AMDGPUSubtarget.h:177

llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour

AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const

Definition:AMDGPUSubtarget.cpp:386

llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount

unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

Definition:AMDGPUSubtarget.cpp:45

llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize

virtual unsigned getMaxFlatWorkGroupSize() const =0

llvm::AMDGPUSubtarget::AMDGPUSubtarget

AMDGPUSubtarget(Triple TT)

Definition:AMDGPUSubtarget.cpp:35

llvm::AMDGPUSubtarget::getExplicitKernelArgOffset

unsigned getExplicitKernelArgOffset() const

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

Definition:AMDGPUSubtarget.h:294

llvm::AMDGPUSubtarget::getMaxWavesPerEU

unsigned getMaxWavesPerEU() const

Definition:AMDGPUSubtarget.h:331

llvm::AMDGPUSubtarget::getExplicitKernArgSize

uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const

Definition:AMDGPUSubtarget.cpp:339

llvm::AMDGPUSubtarget::getEffectiveWavesPerEU

std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > WavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const

Definition:AMDGPUSubtarget.cpp:182

llvm::AMDGPUSubtarget::isSingleLaneExecution

bool isSingleLaneExecution(const Function &Kernel) const

Return true if only a single workitem can be active in a wave.

Definition:AMDGPUSubtarget.cpp:243

llvm::AMDGPUSubtarget::get

static const AMDGPUSubtarget & get(const MachineFunction &MF)

Definition:AMDGPUSubtarget.cpp:391

llvm::AMDGPUSubtarget::getWavefrontSize

unsigned getWavefrontSize() const

Definition:AMDGPUSubtarget.h:259

llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize

virtual unsigned getMinFlatWorkGroupSize() const =0

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::Argument

This class represents an incoming formal argument to a Function.

Definition:Argument.h:31

llvm::ConstantRange

This class represents a range of values.

Definition:ConstantRange.h:47

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::Function

Definition:Function.h:63

llvm::GCNSubtarget

Definition:GCNSubtarget.h:34

llvm::GlobalObject::getMetadata

MDNode * getMetadata(unsigned KindID) const

Get the current metadata attachments for the given kind, if any.

Definition:Value.h:565

llvm::Instruction

Definition:Instruction.h:68

llvm::MDBuilder

Definition:MDBuilder.h:36

llvm::MDBuilder::createRange

MDNode * createRange(const APInt &Lo, const APInt &Hi)

Return metadata describing the range [Lo, Hi).

Definition:MDBuilder.cpp:95

llvm::MDNode

Metadata node.

Definition:Metadata.h:1073

llvm::MachineFunction

Definition:MachineFunction.h:267

llvm::MachineFunction::getSubtarget

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Definition:MachineFunction.h:733

llvm::MachineFunction::getFunction

Function & getFunction()

Return the LLVM function that this machine code represents.

Definition:MachineFunction.h:704

llvm::MachineFunction::getInfo

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Definition:MachineFunction.h:831

llvm::MachineFunction::getTarget

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Definition:MachineFunction.h:729

llvm::Module

A Module instance is used to store all the information related to an LLVM module.

Definition:Module.h:65

llvm::R600Subtarget

Definition:R600Subtarget.h:29

llvm::SIMachineFunctionInfo

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

Definition:SIMachineFunctionInfo.h:390

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::TargetMachine

Primary interface to the complete machine description for the target machine.

Definition:TargetMachine.h:77

llvm::TargetMachine::getTargetTriple

const Triple & getTargetTriple() const

Definition:TargetMachine.h:126

llvm::Triple

Triple - Helper class for working with autoconf configuration names.

Definition:Triple.h:44

llvm::Triple::amdgcn

@ amdgcn

Definition:Triple.h:74

llvm::Triple::getArch

ArchType getArch() const

Get the parsed architecture type of this triple.

Definition:Triple.h:395

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

uint32_t

uint64_t

unsigned

llvm::AMDGPU::AMDHSA_COV5

@ AMDHSA_COV5

Definition:AMDGPUBaseInfo.h:56

llvm::AMDGPU::isKernel

LLVM_READNONE bool isKernel(CallingConv::ID CC)

Definition:AMDGPUBaseInfo.h:1301

llvm::AMDGPU::getAMDHSACodeObjectVersion

unsigned getAMDHSACodeObjectVersion(const Module &M)

Definition:AMDGPUBaseInfo.cpp:172

llvm::AMDGPU::isShader

bool isShader(CallingConv::ID cc)

Definition:AMDGPUBaseInfo.cpp:2041

llvm::AMDGPU::getIntegerVecAttribute

SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)

Definition:AMDGPUBaseInfo.cpp:1367

llvm::AMDGPU::getIntegerPairAttribute

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

Definition:AMDGPUBaseInfo.cpp:1332

llvm::CallingConv::AMDGPU_VS

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

Definition:CallingConv.h:188

llvm::CallingConv::AMDGPU_KERNEL

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

Definition:CallingConv.h:200

llvm::CallingConv::AMDGPU_HS

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).