Movatterモバイル変換

Go to the documentation of this file.

1//===-- AMDGPULowerKernelArguments.cpp ------------------------------------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9/// \file This pass replaces accesses to kernel arguments with loads from

10/// offsets from the kernarg base pointer.

11//

12//===----------------------------------------------------------------------===//

14#include "AMDGPU.h"

15#include "GCNSubtarget.h"

16#include "llvm/Analysis/ValueTracking.h"

17#include "llvm/CodeGen/TargetPassConfig.h"

18#include "llvm/IR/Attributes.h"

19#include "llvm/IR/IRBuilder.h"

20#include "llvm/IR/IntrinsicsAMDGPU.h"

21#include "llvm/IR/MDBuilder.h"

22#include "llvm/Target/TargetMachine.h"

24#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"

26using namespacellvm;

28namespace{

30classPreloadKernelArgInfo {

31private:

32Function &F;

33constGCNSubtarget &ST;

34unsigned NumFreeUserSGPRs;

36enum HiddenArg :unsigned {

37 HIDDEN_BLOCK_COUNT_X,

38 HIDDEN_BLOCK_COUNT_Y,

39 HIDDEN_BLOCK_COUNT_Z,

40 HIDDEN_GROUP_SIZE_X,

41 HIDDEN_GROUP_SIZE_Y,

42 HIDDEN_GROUP_SIZE_Z,

43 HIDDEN_REMAINDER_X,

44 HIDDEN_REMAINDER_Y,

45 HIDDEN_REMAINDER_Z,

46 END_HIDDEN_ARGS

47 };

49// Stores information about a specific hidden argument.

50structHiddenArgInfo {

51// Offset in bytes from the location in the kernearg segment pointed to by

52// the implicitarg pointer.

53uint8_t Offset;

54// The size of the hidden argument in bytes.

55uint8_t Size;

56// The name of the hidden argument in the kernel signature.

57constchar *Name;

58 };

60staticconstexpr HiddenArgInfo HiddenArgs[END_HIDDEN_ARGS] = {

61 {0, 4,"_hidden_block_count_x"}, {4, 4,"_hidden_block_count_y"},

62 {8, 4,"_hidden_block_count_z"}, {12, 2,"_hidden_group_size_x"},

63 {14, 2,"_hidden_group_size_y"}, {16, 2,"_hidden_group_size_z"},

64 {18, 2,"_hidden_remainder_x"}, {20, 2,"_hidden_remainder_y"},

65 {22, 2,"_hidden_remainder_z"}};

67static HiddenArg getHiddenArgFromOffset(unsignedOffset) {

68for (unsignedI = 0;I < END_HIDDEN_ARGS; ++I)

69if (HiddenArgs[I].Offset ==Offset)

70returnstatic_cast<HiddenArg>(I);

72return END_HIDDEN_ARGS;

73 }

75staticType *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {

76if (HA < END_HIDDEN_ARGS)

77returnType::getIntNTy(Ctx, HiddenArgs[HA].Size * 8);

79llvm_unreachable("Unexpected hidden argument.");

80 }

82staticconstchar *getHiddenArgName(HiddenArg HA) {

83if (HA < END_HIDDEN_ARGS) {

84return HiddenArgs[HA].Name;

85 }

86llvm_unreachable("Unexpected hidden argument.");

87 }

89// Clones the function after adding implicit arguments to the argument list

90// and returns the new updated function. Preloaded implicit arguments are

91// added up to and including the last one that will be preloaded, indicated by

92// LastPreloadIndex. Currently preloading is only performed on the totality of

93// sequential data from the kernarg segment including implicit (hidden)

94// arguments. This means that all arguments up to the last preloaded argument

95// will also be preloaded even if that data is unused.

96Function *cloneFunctionWithPreloadImplicitArgs(unsigned LastPreloadIndex) {

97FunctionType *FT =F.getFunctionType();

98LLVMContext &Ctx =F.getParent()->getContext();

99SmallVector<Type *, 16> FTypes(FT->param_begin(), FT->param_end());

100for (unsignedI = 0;I <= LastPreloadIndex; ++I)

101 FTypes.push_back(getHiddenArgType(Ctx, HiddenArg(I)));

102

103FunctionType *NFT =

104 FunctionType::get(FT->getReturnType(), FTypes, FT->isVarArg());

105Function *NF =

106Function::Create(NFT,F.getLinkage(),F.getAddressSpace(),F.getName());

107

108 NF->copyAttributesFrom(&F);

109 NF->copyMetadata(&F, 0);

110 NF->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);

111

112F.getParent()->getFunctionList().insert(F.getIterator(), NF);

113 NF->takeName(&F);

114 NF->splice(NF->begin(), &F);

115

116Function::arg_iterator NFArg = NF->arg_begin();

117for (Argument &Arg :F.args()) {

118 Arg.replaceAllUsesWith(&*NFArg);

119 NFArg->takeName(&Arg);

120 ++NFArg;

121 }

122

123AttrBuilder AB(Ctx);

124 AB.addAttribute(Attribute::InReg);

125 AB.addAttribute("amdgpu-hidden-argument");

126AttributeList AL = NF->getAttributes();

127for (unsignedI = 0;I <= LastPreloadIndex; ++I) {

128 AL = AL.addParamAttributes(Ctx, NFArg->getArgNo(), AB);

129 NFArg++->setName(getHiddenArgName(HiddenArg(I)));

130 }

131

132 NF->setAttributes(AL);

133F.replaceAllUsesWith(NF);

134F.setCallingConv(CallingConv::C);

135

136return NF;

137 }

138

139public:

140 PreloadKernelArgInfo(Function &F,constGCNSubtarget &ST) :F(F), ST(ST) {

141 setInitialFreeUserSGPRsCount();

142 }

143

144// Returns the maximum number of user SGPRs that we have available to preload

145// arguments.

146void setInitialFreeUserSGPRsCount() {

147GCNUserSGPRUsageInfo UserSGPRInfo(F, ST);

148 NumFreeUserSGPRs = UserSGPRInfo.getNumFreeUserSGPRs();

149 }

150

151bool tryAllocPreloadSGPRs(unsigned AllocSize,uint64_t ArgOffset,

152uint64_t LastExplicitArgOffset) {

153// Check if this argument may be loaded into the same register as the

154// previous argument.

155if (ArgOffset - LastExplicitArgOffset < 4 &&

156 !isAligned(Align(4), ArgOffset))

157returntrue;

158

159// Pad SGPRs for kernarg alignment.

160 ArgOffset =alignDown(ArgOffset, 4);

161unsigned Padding = ArgOffset - LastExplicitArgOffset;

162unsigned PaddingSGPRs =alignTo(Padding, 4) / 4;

163unsigned NumPreloadSGPRs =alignTo(AllocSize, 4) / 4;

164if (NumPreloadSGPRs + PaddingSGPRs > NumFreeUserSGPRs)

165returnfalse;

166

167 NumFreeUserSGPRs -= (NumPreloadSGPRs + PaddingSGPRs);

168returntrue;

169 }

170

171// Try to allocate SGPRs to preload implicit kernel arguments.

172void tryAllocImplicitArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,

173uint64_t LastExplicitArgOffset,

174IRBuilder<> &Builder) {

175Function *ImplicitArgPtr =Intrinsic::getDeclarationIfExists(

176F.getParent(), Intrinsic::amdgcn_implicitarg_ptr);

177if (!ImplicitArgPtr)

178return;

179

180constDataLayout &DL =F.getParent()->getDataLayout();

181// Pair is the load and the load offset.

182SmallVector<std::pair<LoadInst *, unsigned>, 4> ImplicitArgLoads;

183for (auto *U : ImplicitArgPtr->users()) {

184Instruction *CI = dyn_cast<Instruction>(U);

185if (!CI || CI->getParent()->getParent() != &F)

186continue;

187

188for (auto *U : CI->users()) {

189 int64_tOffset = 0;

190auto *Load = dyn_cast<LoadInst>(U);// Load from ImplicitArgPtr?

191if (!Load) {

192if (GetPointerBaseWithConstantOffset(U,Offset,DL) != CI)

193continue;

194

195 Load = dyn_cast<LoadInst>(*U->user_begin());// Load from GEP?

196 }

197

198if (!Load || !Load->isSimple())

199continue;

200

201// FIXME: Expand to handle 64-bit implicit args and large merged loads.

202LLVMContext &Ctx =F.getParent()->getContext();

203Type *LoadTy = Load->getType();

204 HiddenArg HA = getHiddenArgFromOffset(Offset);

205if (HA == END_HIDDEN_ARGS || LoadTy != getHiddenArgType(Ctx, HA))

206continue;

207

208 ImplicitArgLoads.push_back(std::make_pair(Load,Offset));

209 }

210 }

211

212if (ImplicitArgLoads.empty())

213return;

214

215// Allocate loads in order of offset. We need to be sure that the implicit

216// argument can actually be preloaded.

217 std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(),less_second());

218

219// If we fail to preload any implicit argument we know we don't have SGPRs

220// to preload any subsequent ones with larger offsets. Find the first

221// argument that we cannot preload.

222auto *PreloadEnd = std::find_if(

223 ImplicitArgLoads.begin(), ImplicitArgLoads.end(),

224 [&](const std::pair<LoadInst *, unsigned> &Load) {

225 unsigned LoadSize = DL.getTypeStoreSize(Load.first->getType());

226 unsigned LoadOffset = Load.second;

227 if (!tryAllocPreloadSGPRs(LoadSize,

228 LoadOffset + ImplicitArgsBaseOffset,

229 LastExplicitArgOffset))

230 return true;

231

232 LastExplicitArgOffset =

233 ImplicitArgsBaseOffset + LoadOffset + LoadSize;

234 return false;

235 });

236

237if (PreloadEnd == ImplicitArgLoads.begin())

238return;

239

240unsigned LastHiddenArgIndex = getHiddenArgFromOffset(PreloadEnd[-1].second);

241Function *NF = cloneFunctionWithPreloadImplicitArgs(LastHiddenArgIndex);

242assert(NF);

243for (constauto *I = ImplicitArgLoads.begin();I != PreloadEnd; ++I) {

244LoadInst *LoadInst =I->first;

245unsigned LoadOffset =I->second;

246unsigned HiddenArgIndex = getHiddenArgFromOffset(LoadOffset);

247unsigned Index = NF->arg_size() - LastHiddenArgIndex + HiddenArgIndex - 1;

248Argument *Arg = NF->getArg(Index);

249LoadInst->replaceAllUsesWith(Arg);

250 }

251 }

252};

253

254classAMDGPULowerKernelArguments :publicFunctionPass {

255public:

256staticcharID;

257

258 AMDGPULowerKernelArguments() :FunctionPass(ID) {}

259

260boolrunOnFunction(Function &F)override;

261

262voidgetAnalysisUsage(AnalysisUsage &AU) const override{

263 AU.addRequired<TargetPassConfig>();

264 AU.setPreservesAll();

265 }

266};

267

268}// end anonymous namespace

269

270// skip allocas

271staticBasicBlock::iterator getInsertPt(BasicBlock &BB) {

272BasicBlock::iterator InsPt = BB.getFirstInsertionPt();

273for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {

274AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);

275

276// If this is a dynamic alloca, the value may depend on the loaded kernargs,

277// so loads will need to be inserted before it.

278if (!AI || !AI->isStaticAlloca())

279break;

280 }

281

282return InsPt;

283}

284

285staticboollowerKernelArguments(Function &F,constTargetMachine &TM) {

286CallingConv::ID CC =F.getCallingConv();

287if (CC !=CallingConv::AMDGPU_KERNEL ||F.arg_empty())

288returnfalse;

289

290constGCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

291LLVMContext &Ctx =F.getParent()->getContext();

292constDataLayout &DL =F.getDataLayout();

293BasicBlock &EntryBlock = *F.begin();

294IRBuilder<> Builder(&EntryBlock,getInsertPt(EntryBlock));

295

296constAlign KernArgBaseAlign(16);// FIXME: Increase if necessary

297constuint64_t BaseOffset = ST.getExplicitKernelArgOffset();

298

299Align MaxAlign;

300// FIXME: Alignment is broken with explicit arg offset.;

301constuint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);

302if (TotalKernArgSize == 0)

303returnfalse;

304

305CallInst *KernArgSegment =

306 Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},

307nullptr,F.getName() +".kernarg.segment");

308 KernArgSegment->addRetAttr(Attribute::NonNull);

309 KernArgSegment->addRetAttr(

310Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));

311

312uint64_t ExplicitArgOffset = 0;

313// Preloaded kernel arguments must be sequential.

314bool InPreloadSequence =true;

315 PreloadKernelArgInfo PreloadInfo(F, ST);

316

317for (Argument &Arg :F.args()) {

318constbool IsByRef = Arg.hasByRefAttr();

319Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();

320MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;

321Align ABITypeAlign =DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);

322

323uint64_t Size =DL.getTypeSizeInBits(ArgTy);

324uint64_t AllocSize =DL.getTypeAllocSize(ArgTy);

325

326uint64_t EltOffset =alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;

327uint64_t LastExplicitArgOffset = ExplicitArgOffset;

328 ExplicitArgOffset =alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;

329

330// Guard against the situation where hidden arguments have already been

331// lowered and added to the kernel function signiture, i.e. in a situation

332// where this pass has run twice.

333if (Arg.hasAttribute("amdgpu-hidden-argument"))

334break;

335

336// Try to preload this argument into user SGPRs.

337if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() &&

338 !Arg.getType()->isAggregateType())

339if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset,

340 LastExplicitArgOffset))

341continue;

342

343 InPreloadSequence =false;

344

345if (Arg.use_empty())

346continue;

347

348// If this is byval, the loads are already explicit in the function. We just

349// need to rewrite the pointer values.

350if (IsByRef) {

351Value *ArgOffsetPtr = Builder.CreateConstInBoundsGEP1_64(

352 Builder.getInt8Ty(), KernArgSegment, EltOffset,

353 Arg.getName() +".byval.kernarg.offset");

354

355Value *CastOffsetPtr =

356 Builder.CreateAddrSpaceCast(ArgOffsetPtr, Arg.getType());

357 Arg.replaceAllUsesWith(CastOffsetPtr);

358continue;

359 }

360

361if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) {

362// FIXME: Hack. We rely on AssertZext to be able to fold DS addressing

363// modes on SI to know the high bits are 0 so pointer adds don't wrap. We

364// can't represent this with range metadata because it's only allowed for

365// integer types.

366if ((PT->getAddressSpace() ==AMDGPUAS::LOCAL_ADDRESS ||

367 PT->getAddressSpace() ==AMDGPUAS::REGION_ADDRESS) &&

368 !ST.hasUsableDSOffset())

369continue;

370

371// FIXME: We can replace this with equivalent alias.scope/noalias

372// metadata, but this appears to be a lot of work.

373if (Arg.hasNoAliasAttr())

374continue;

375 }

376

377auto *VT = dyn_cast<FixedVectorType>(ArgTy);

378bool IsV3 = VT && VT->getNumElements() == 3;

379bool DoShiftOpt =Size < 32 && !ArgTy->isAggregateType();

380

381VectorType *V4Ty =nullptr;

382

383 int64_t AlignDownOffset =alignDown(EltOffset, 4);

384 int64_t OffsetDiff = EltOffset - AlignDownOffset;

385Align AdjustedAlign =commonAlignment(

386 KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);

387

388Value *ArgPtr;

389Type *AdjustedArgTy;

390if (DoShiftOpt) {// FIXME: Handle aggregate types

391// Since we don't have sub-dword scalar loads, avoid doing an extload by

392// loading earlier than the argument address, and extracting the relevant

393// bits.

394// TODO: Update this for GFX12 which does have scalar sub-dword loads.

395//

396// Additionally widen any sub-dword load to i32 even if suitably aligned,

397// so that CSE between different argument loads works easily.

398 ArgPtr = Builder.CreateConstInBoundsGEP1_64(

399 Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,

400 Arg.getName() +".kernarg.offset.align.down");

401 AdjustedArgTy = Builder.getInt32Ty();

402 }else {

403 ArgPtr = Builder.CreateConstInBoundsGEP1_64(

404 Builder.getInt8Ty(), KernArgSegment, EltOffset,

405 Arg.getName() +".kernarg.offset");

406 AdjustedArgTy = ArgTy;

407 }

408

409if (IsV3 &&Size >= 32) {

410 V4Ty =FixedVectorType::get(VT->getElementType(), 4);

411// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads

412 AdjustedArgTy = V4Ty;

413 }

414

415LoadInst *Load =

416 Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);

417 Load->setMetadata(LLVMContext::MD_invariant_load,MDNode::get(Ctx, {}));

418

419MDBuilder MDB(Ctx);

420

421if (Arg.hasAttribute(Attribute::NoUndef))

422 Load->setMetadata(LLVMContext::MD_noundef,MDNode::get(Ctx, {}));

423

424if (Arg.hasAttribute(Attribute::Range)) {

425constConstantRange &Range =

426 Arg.getAttribute(Attribute::Range).getValueAsConstantRange();

427 Load->setMetadata(LLVMContext::MD_range,

428 MDB.createRange(Range.getLower(),Range.getUpper()));

429 }

430

431if (isa<PointerType>(ArgTy)) {

432if (Arg.hasNonNullAttr())

433 Load->setMetadata(LLVMContext::MD_nonnull,MDNode::get(Ctx, {}));

434

435uint64_t DerefBytes = Arg.getDereferenceableBytes();

436if (DerefBytes != 0) {

437 Load->setMetadata(

438 LLVMContext::MD_dereferenceable,

439MDNode::get(Ctx,

440 MDB.createConstant(

441 ConstantInt::get(Builder.getInt64Ty(), DerefBytes))));

442 }

443

444uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes();

445if (DerefOrNullBytes != 0) {

446 Load->setMetadata(

447 LLVMContext::MD_dereferenceable_or_null,

448MDNode::get(Ctx,

449 MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),

450 DerefOrNullBytes))));

451 }

452

453if (MaybeAlign ParamAlign = Arg.getParamAlign()) {

454 Load->setMetadata(

455 LLVMContext::MD_align,

456MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(

457 Builder.getInt64Ty(), ParamAlign->value()))));

458 }

459 }

460

461// TODO: Convert noalias arg to !noalias

462

463if (DoShiftOpt) {

464Value *ExtractBits = OffsetDiff == 0 ?

465 Load : Builder.CreateLShr(Load, OffsetDiff * 8);

466

467IntegerType *ArgIntTy = Builder.getIntNTy(Size);

468Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);

469Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,

470 Arg.getName() +".load");

471 Arg.replaceAllUsesWith(NewVal);

472 }elseif (IsV3) {

473Value *Shuf = Builder.CreateShuffleVector(Load,ArrayRef<int>{0, 1, 2},

474 Arg.getName() +".load");

475 Arg.replaceAllUsesWith(Shuf);

476 }else {

477 Load->setName(Arg.getName() +".load");

478 Arg.replaceAllUsesWith(Load);

479 }

480 }

481

482 KernArgSegment->addRetAttr(

483Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));

484

485if (InPreloadSequence) {

486uint64_t ImplicitArgsBaseOffset =

487alignTo(ExplicitArgOffset, ST.getAlignmentForImplicitArgPtr()) +

488 BaseOffset;

489 PreloadInfo.tryAllocImplicitArgPreloadSGPRs(ImplicitArgsBaseOffset,

490 ExplicitArgOffset, Builder);

491 }

492

493returntrue;

494}

495

496bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {

497auto &TPC = getAnalysis<TargetPassConfig>();

498constTargetMachine &TM = TPC.getTM<TargetMachine>();

499returnlowerKernelArguments(F, TM);

500}

501

502INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments,DEBUG_TYPE,

503"AMDGPU Lower Kernel Arguments",false,false)

504INITIALIZE_PASS_END(AMDGPULowerKernelArguments,DEBUG_TYPE, "AMDGPULower KernelArguments",

505false,false)

506

507char AMDGPULowerKernelArguments::ID = 0;

508

509FunctionPass *llvm::createAMDGPULowerKernelArgumentsPass() {

510returnnew AMDGPULowerKernelArguments();

511}

512

513PreservedAnalyses

514AMDGPULowerKernelArgumentsPass::run(Function &F,FunctionAnalysisManager &AM) {

515bool Changed =lowerKernelArguments(F, TM);

516if (Changed) {

517// TODO: Preserves a lot more.

518PreservedAnalyses PA;

519 PA.preserveSet<CFGAnalyses>();

520return PA;

521 }

522

523returnPreservedAnalyses::all();

524}

Arguments

AMDGPU Lower Kernel Arguments

Definition:AMDGPULowerKernelArguments.cpp:504

getInsertPt

static BasicBlock::iterator getInsertPt(BasicBlock &BB)

Definition:AMDGPULowerKernelArguments.cpp:271

lowerKernelArguments

static bool lowerKernelArguments(Function &F, const TargetMachine &TM)

Definition:AMDGPULowerKernelArguments.cpp:285

DEBUG_TYPE

#define DEBUG_TYPE

Definition:AMDGPULowerKernelArguments.cpp:24

AMDGPU.h

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

Attributes.h

This file contains the simple types necessary to represent the attributes associated with functions a...

Size

uint64_t Size

Definition:ELFObjHandler.cpp:81

GCNSubtarget.h

AMD GCN specific subclass of TargetSubtarget.

IRBuilder.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

MDBuilder.h

Range

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

INITIALIZE_PASS_END

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:57

INITIALIZE_PASS_BEGIN

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:52

auto CC

Definition:RISCVRedundantCopyElimination.cpp:79

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

TargetPassConfig.h

Target-Independent Code Generator Pass Configuration Options pass.

ValueTracking.h

FunctionType

Definition:ItaniumDemangle.h:823

PointerType

Definition:ItaniumDemangle.h:627

VectorType

Definition:ItaniumDemangle.h:1173

llvm::AMDGPULowerKernelArgumentsPass::run

PreservedAnalyses run(Function &, FunctionAnalysisManager &)

Definition:AMDGPULowerKernelArguments.cpp:514

llvm::AllocaInst

an instruction to allocate memory on the stack

Definition:Instructions.h:63

llvm::AllocaInst::isStaticAlloca

bool isStaticAlloca() const

Return true if this alloca is in the entry block of the function and is a constant size.

Definition:Instructions.cpp:1234

llvm::AnalysisManager

A container for analyses that lazily runs them and caches their results.

Definition:PassManager.h:253

llvm::AnalysisUsage

Represent the analysis usage information of a pass.

Definition:PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired

AnalysisUsage & addRequired()

Definition:PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesAll

void setPreservesAll()

Set by analyses that do not transform their input at all.

Definition:PassAnalysisSupport.h:130

llvm::Argument

This class represents an incoming formal argument to a Function.

Definition:Argument.h:31

llvm::Argument::getArgNo

unsigned getArgNo() const

Return the index of this formal argument in its containing function.

Definition:Argument.h:49

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::AttrBuilder

Definition:Attributes.h:1064

llvm::AttributeList

Definition:Attributes.h:490

llvm::Attribute::getWithDereferenceableBytes

static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)

Definition:Attributes.cpp:244

llvm::Attribute::getWithAlignment

static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)

Return a uniquified Attribute object that has the specific alignment set.

Definition:Attributes.cpp:234

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::end

iterator end()

Definition:BasicBlock.h:474

llvm::BasicBlock::getFirstInsertionPt

const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

Definition:BasicBlock.cpp:437

llvm::BasicBlock::iterator

InstListType::iterator iterator

Instruction iterators...

Definition:BasicBlock.h:177

llvm::CFGAnalyses

Represents analyses that only rely on functions' control flow.

Definition:Analysis.h:72

llvm::CallBase::addRetAttr

void addRetAttr(Attribute::AttrKind Kind)

Adds the attribute to the return value.

Definition:InstrTypes.h:1484

llvm::CallInst

This class represents a function call, abstracting a target machine's calling convention.

Definition:Instructions.h:1479

llvm::ConstantRange

This class represents a range of values.

Definition:ConstantRange.h:47

llvm::ConstantRange::getLower

const APInt & getLower() const

Return the lower value for this range.

Definition:ConstantRange.h:203

llvm::ConstantRange::getUpper

const APInt & getUpper() const

Return the upper value for this range.

Definition:ConstantRange.h:206

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::FixedVectorType::get

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

Definition:Type.cpp:791

llvm::FunctionPass

FunctionPass class - This class is used to implement most global optimizations.

Definition:Pass.h:310

llvm::FunctionPass::runOnFunction

virtual bool runOnFunction(Function &F)=0

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function

Definition:Function.h:63

llvm::Function::Create

static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)

Definition:Function.h:173

llvm::Function::splice

void splice(Function::iterator ToIt, Function *FromF)

Transfer all blocks from FromF to this function at ToIt.

Definition:Function.h:761

llvm::Function::getAttributes

AttributeList getAttributes() const

Return the attribute list for this Function.

Definition:Function.h:353

llvm::Function::begin

iterator begin()

Definition:Function.h:853

llvm::Function::arg_begin

arg_iterator arg_begin()

Definition:Function.h:868

llvm::Function::setAttributes

void setAttributes(AttributeList Attrs)

Set the attribute list for this Function.

Definition:Function.h:356

llvm::Function::arg_size

size_t arg_size() const

Definition:Function.h:901

llvm::Function::setIsNewDbgInfoFormat

void setIsNewDbgInfoFormat(bool NewVal)

Definition:Function.cpp:105

llvm::Function::getArg

Argument * getArg(unsigned i) const

Definition:Function.h:886

llvm::Function::copyAttributesFrom

void copyAttributesFrom(const Function *Src)

copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...

Definition:Function.cpp:860

llvm::GCNSubtarget

Definition:GCNSubtarget.h:34

llvm::GCNUserSGPRUsageInfo

Definition:GCNSubtarget.h:1660

llvm::GCNUserSGPRUsageInfo::getNumFreeUserSGPRs

unsigned getNumFreeUserSGPRs()

Definition:GCNSubtarget.cpp:674

llvm::GlobalObject::copyMetadata

void copyMetadata(const GlobalObject *Src, unsigned Offset)

Copy metadata from Src, adjusting offsets by Offset.

Definition:Metadata.cpp:1799

llvm::IRBuilderBase::getIntNTy

IntegerType * getIntNTy(unsigned N)

Fetch the type representing an N-bit integer.

Definition:IRBuilder.h:558

llvm::IRBuilderBase::CreateAlignedLoad

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

Definition:IRBuilder.h:1815

llvm::IRBuilderBase::CreateLShr

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Definition:IRBuilder.h:1480

llvm::IRBuilderBase::getInt32Ty

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

Definition:IRBuilder.h:545

llvm::IRBuilderBase::getInt64Ty

IntegerType * getInt64Ty()

Fetch the type representing a 64-bit integer.

Definition:IRBuilder.h:550

llvm::IRBuilderBase::CreateIntrinsic

CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Definition:IRBuilder.cpp:900

llvm::IRBuilderBase::CreateBitCast

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

Definition:IRBuilder.h:2152

llvm::IRBuilderBase::CreateShuffleVector

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Definition:IRBuilder.h:2533

llvm::IRBuilderBase::CreateTrunc

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

Definition:IRBuilder.h:2019

llvm::IRBuilderBase::CreateConstInBoundsGEP1_64

Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")

Definition:IRBuilder.h:1944

llvm::IRBuilderBase::getInt8Ty

IntegerType * getInt8Ty()

Fetch the type representing an 8-bit integer.

Definition:IRBuilder.h:535

llvm::IRBuilderBase::CreateAddrSpaceCast

Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")

Definition:IRBuilder.h:2157

llvm::IRBuilder

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Definition:IRBuilder.h:2705

llvm::Instruction

Definition:Instruction.h:68

llvm::IntegerType

Class to represent integer types.

Definition:DerivedTypes.h:42

llvm::LLVMContext

This is an important class for using LLVM in a threaded context.

Definition:LLVMContext.h:67

llvm::LoadInst

An instruction for reading from memory.

Definition:Instructions.h:176

llvm::MDBuilder

Definition:MDBuilder.h:36

llvm::MDBuilder::createConstant

ConstantAsMetadata * createConstant(Constant *C)

Return the given constant as metadata.

Definition:MDBuilder.cpp:24

llvm::MDBuilder::createRange

MDNode * createRange(const APInt &Lo, const APInt &Hi)

Return metadata describing the range [Lo, Hi).

Definition:MDBuilder.cpp:95

llvm::MDNode::get

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

Definition:Metadata.h:1549

llvm::Pass::getAnalysisUsage

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

Definition:Pass.cpp:98

llvm::PreservedAnalyses

A set of analyses that are preserved following a run of a transformation pass.

Definition:Analysis.h:111

llvm::PreservedAnalyses::all

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Definition:Analysis.h:117

llvm::PreservedAnalyses::preserveSet

void preserveSet()

Mark an analysis set as preserved.

Definition:Analysis.h:146

llvm::SmallVectorBase::empty

bool empty() const

Definition:SmallVector.h:81

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVectorTemplateCommon::end

iterator end()

Definition:SmallVector.h:269

llvm::SmallVectorTemplateCommon::begin

iterator begin()

Definition:SmallVector.h:267

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::TargetMachine

Primary interface to the complete machine description for the target machine.

Definition:TargetMachine.h:77

llvm::TargetPassConfig

Target-Independent Code Generator Pass Configuration Options.

Definition:TargetPassConfig.h:85

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::getIntNTy

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::isAggregateType

bool isAggregateType() const

Return true if the type is an aggregate type.

Definition:Type.h:303

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::setName

void setName(const Twine &Name)

Change the name of the value.

Definition:Value.cpp:377

llvm::Value::replaceAllUsesWith

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

Definition:Value.cpp:534

llvm::Value::users

iterator_range< user_iterator > users()

Definition:Value.h:421

llvm::Value::getName

StringRef getName() const

Return a constant reference to the value's name.

Definition:Value.cpp:309

llvm::Value::takeName

void takeName(Value *V)

Transfer the name from V to this value.

Definition:Value.cpp:383

llvm::ilist_detail::node_parent_access::getParent

const ParentTy * getParent() const

Definition:ilist_node.h:32

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

TargetMachine.h

false

Definition:StackSlotColoring.cpp:193

llvm::AMDGPUAS::REGION_ADDRESS

@ REGION_ADDRESS

Address space for region memory. (GDS)

Definition:AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS

@ LOCAL_ADDRESS

Address space for local memory.

Definition:AMDGPUAddrSpace.h:35

llvm::CallingConv::AMDGPU_KERNEL

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

Definition:CallingConv.h:200

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition:CallingConv.h:24

llvm::Intrinsic::getDeclarationIfExists

Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)

This version supports overloaded intrinsics.

Definition:Intrinsics.cpp:747

llvm::SystemZISD::TM

@ TM

Definition:SystemZISelLowering.h:66

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::Offset

@ Offset

Definition:DWP.cpp:480

llvm::isAligned

bool isAligned(Align Lhs, uint64_t SizeInBytes)

Checks that SizeInBytes is a multiple of the alignment.

Definition:Alignment.h:145

llvm::GetPointerBaseWithConstantOffset

Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)

Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.

Definition:ValueTracking.h:639

llvm::alignDown

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

Definition:MathExtras.h:556

llvm::HexPrintStyle::Lower

@ Lower

llvm::createAMDGPULowerKernelArgumentsPass

FunctionPass * createAMDGPULowerKernelArgumentsPass()

Definition:AMDGPULowerKernelArguments.cpp:509

llvm::alignTo

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

Definition:Alignment.h:155

llvm::commonAlignment

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

Definition:Alignment.h:212

llvm::Align

This struct is a compact representation of a valid (non-zero power of two) alignment.

Definition:Alignment.h:39

llvm::MaybeAlign

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

Definition:Alignment.h:117

llvm::less_second

Function object to check whether the second component of a container supported by std::get (like std:...

Definition:STLExtras.h:1476