Movatterモバイル変換

Go to the documentation of this file.

1//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

9#include "SIMachineFunctionInfo.h"

10#include "AMDGPUSubtarget.h"

11#include "GCNSubtarget.h"

12#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

13#include "SIRegisterInfo.h"

14#include "Utils/AMDGPUBaseInfo.h"

15#include "llvm/CodeGen/LiveIntervals.h"

16#include "llvm/CodeGen/MIRParser/MIParser.h"

17#include "llvm/CodeGen/MachineBasicBlock.h"

18#include "llvm/CodeGen/MachineFrameInfo.h"

19#include "llvm/CodeGen/MachineFunction.h"

20#include "llvm/CodeGen/MachineRegisterInfo.h"

21#include "llvm/IR/CallingConv.h"

22#include "llvm/IR/DiagnosticInfo.h"

23#include "llvm/IR/Function.h"

24#include <cassert>

25#include <optional>

26#include <vector>

28enum {MAX_LANES = 64 };

30using namespacellvm;

32constGCNTargetMachine &getTM(constGCNSubtarget *STI) {

33constSITargetLowering *TLI = STI->getTargetLowering();

34returnstatic_cast<constGCNTargetMachine &>(TLI->getTargetMachine());

35}

37SIMachineFunctionInfo::SIMachineFunctionInfo(constFunction &F,

38constGCNSubtarget *STI)

39 :AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),

40 UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),

41 WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),

42 PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),

43 WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),

44 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {

45constGCNSubtarget &ST = *static_cast<constGCNSubtarget *>(STI);

46 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);

47 WavesPerEU = ST.getWavesPerEU(F);

48 MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);

49assert(MaxNumWorkGroups.size() == 3);

51 Occupancy = ST.computeOccupancy(F,getLDSSize()).second;

52CallingConv::ID CC =F.getCallingConv();

54 VRegFlags.reserve(1024);

56constbool IsKernel =CC ==CallingConv::AMDGPU_KERNEL ||

57CC ==CallingConv::SPIR_KERNEL;

59if (IsKernel) {

60 WorkGroupIDX =true;

61 WorkItemIDX =true;

62 }elseif (CC ==CallingConv::AMDGPU_PS) {

63 PSInputAddr =AMDGPU::getInitialPSInputAddr(F);

64 }

66 MayNeedAGPRs = ST.hasMAIInsts();

68if (AMDGPU::isChainCC(CC)) {

69// Chain functions don't receive an SP from their caller, but are free to

70// set one up. For now, we can use s32 to match what amdgpu_gfx functions

71// would use if called, but this can be revisited.

72// FIXME: Only reserve this if we actually need it.

73 StackPtrOffsetReg = AMDGPU::SGPR32;

75 ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;

77ArgInfo.PrivateSegmentBuffer =

78ArgDescriptor::createRegister(ScratchRSrcReg);

80 ImplicitArgPtr =false;

81 }elseif (!isEntryFunction()) {

82if (CC !=CallingConv::AMDGPU_Gfx)

83ArgInfo =AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;

85 FrameOffsetReg = AMDGPU::SGPR33;

86 StackPtrOffsetReg = AMDGPU::SGPR32;

88if (!ST.enableFlatScratch()) {

89// Non-entry functions have no special inputs for now, other registers

90// required for scratch access.

91 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;

93ArgInfo.PrivateSegmentBuffer =

94ArgDescriptor::createRegister(ScratchRSrcReg);

95 }

97if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))

98 ImplicitArgPtr =true;

99 }else {

100 ImplicitArgPtr =false;

101MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),

102MaxKernArgAlign);

103

104if (ST.hasGFX90AInsts() &&

105 ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&

106 !mayUseAGPRs(F))

107 MayNeedAGPRs =false;// We will select all MAI with VGPR operands.

108 }

109

110if (!AMDGPU::isGraphics(CC) ||

111 ((CC ==CallingConv::AMDGPU_CS ||CC ==CallingConv::AMDGPU_Gfx) &&

112 ST.hasArchitectedSGPRs())) {

113if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))

114 WorkGroupIDX =true;

115

116if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))

117 WorkGroupIDY =true;

118

119if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))

120 WorkGroupIDZ =true;

121 }

122

123if (!AMDGPU::isGraphics(CC)) {

124if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))

125 WorkItemIDX =true;

126

127if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&

128 ST.getMaxWorkitemID(F, 1) != 0)

129 WorkItemIDY =true;

130

131if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&

132 ST.getMaxWorkitemID(F, 2) != 0)

133 WorkItemIDZ =true;

134

135if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))

136 LDSKernelId =true;

137 }

138

139if (isEntryFunction()) {

140// X, XY, and XYZ are the only supported combinations, so make sure Y is

141// enabled if Z is.

142if (WorkItemIDZ)

143 WorkItemIDY =true;

144

145if (!ST.flatScratchIsArchitected()) {

146 PrivateSegmentWaveByteOffset =true;

147

148// HS and GS always have the scratch wave offset in SGPR5 on GFX9.

149if (ST.getGeneration() >=AMDGPUSubtarget::GFX9 &&

150 (CC ==CallingConv::AMDGPU_HS ||CC ==CallingConv::AMDGPU_GS))

151ArgInfo.PrivateSegmentWaveByteOffset =

152ArgDescriptor::createRegister(AMDGPU::SGPR5);

153 }

154 }

155

156Attribute A =F.getFnAttribute("amdgpu-git-ptr-high");

157StringRef S =A.getValueAsString();

158if (!S.empty())

159 S.consumeInteger(0, GITPtrHigh);

160

161A =F.getFnAttribute("amdgpu-32bit-address-high-bits");

162 S =A.getValueAsString();

163if (!S.empty())

164 S.consumeInteger(0, HighBitsOf32BitAddress);

165

166 MaxMemoryClusterDWords =F.getFnAttributeAsParsedInteger(

167"amdgpu-max-memory-cluster-dwords",DefaultMemoryClusterDWordsLimit);

168

169// On GFX908, in order to guarantee copying between AGPRs, we need a scratch

170// VGPR available at all times. For now, reserve highest available VGPR. After

171// RA, shift it to the lowest available unused VGPR if the one exist.

172if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

173 VGPRForAGPRCopy =

174 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);

175 }

176}

177

178MachineFunctionInfo *SIMachineFunctionInfo::clone(

179BumpPtrAllocator &Allocator,MachineFunction &DestMF,

180constDenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)

181 const{

182return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);

183}

184

185voidSIMachineFunctionInfo::limitOccupancy(constMachineFunction &MF) {

186limitOccupancy(getMaxWavesPerEU());

187constGCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();

188limitOccupancy(ST.getOccupancyWithWorkGroupSizes(MF).second);

189}

190

191Register SIMachineFunctionInfo::addPrivateSegmentBuffer(

192constSIRegisterInfo &TRI) {

193ArgInfo.PrivateSegmentBuffer =

194ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

195 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));

196 NumUserSGPRs += 4;

197returnArgInfo.PrivateSegmentBuffer.getRegister();

198}

199

200Register SIMachineFunctionInfo::addDispatchPtr(constSIRegisterInfo &TRI) {

201ArgInfo.DispatchPtr =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

202 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

203 NumUserSGPRs += 2;

204returnArgInfo.DispatchPtr.getRegister();

205}

206

207Register SIMachineFunctionInfo::addQueuePtr(constSIRegisterInfo &TRI) {

208ArgInfo.QueuePtr =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

209 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

210 NumUserSGPRs += 2;

211returnArgInfo.QueuePtr.getRegister();

212}

213

214Register SIMachineFunctionInfo::addKernargSegmentPtr(constSIRegisterInfo &TRI) {

215ArgInfo.KernargSegmentPtr

216 =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

217 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

218 NumUserSGPRs += 2;

219returnArgInfo.KernargSegmentPtr.getRegister();

220}

221

222Register SIMachineFunctionInfo::addDispatchID(constSIRegisterInfo &TRI) {

223ArgInfo.DispatchID =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

224 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

225 NumUserSGPRs += 2;

226returnArgInfo.DispatchID.getRegister();

227}

228

229Register SIMachineFunctionInfo::addFlatScratchInit(constSIRegisterInfo &TRI) {

230ArgInfo.FlatScratchInit =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

231 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

232 NumUserSGPRs += 2;

233returnArgInfo.FlatScratchInit.getRegister();

234}

235

236Register SIMachineFunctionInfo::addPrivateSegmentSize(constSIRegisterInfo &TRI) {

237ArgInfo.PrivateSegmentSize =ArgDescriptor::createRegister(getNextUserSGPR());

238 NumUserSGPRs += 1;

239returnArgInfo.PrivateSegmentSize.getRegister();

240}

241

242Register SIMachineFunctionInfo::addImplicitBufferPtr(constSIRegisterInfo &TRI) {

243ArgInfo.ImplicitBufferPtr =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

244 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

245 NumUserSGPRs += 2;

246returnArgInfo.ImplicitBufferPtr.getRegister();

247}

248

249Register SIMachineFunctionInfo::addLDSKernelId() {

250ArgInfo.LDSKernelId =ArgDescriptor::createRegister(getNextUserSGPR());

251 NumUserSGPRs += 1;

252returnArgInfo.LDSKernelId.getRegister();

253}

254

255SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(

256constSIRegisterInfo &TRI,constTargetRegisterClass *RC,

257unsigned AllocSizeDWord,int KernArgIdx,int PaddingSGPRs) {

258assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) &&

259"Preload kernel argument allocated twice.");

260 NumUserSGPRs += PaddingSGPRs;

261// If the available register tuples are aligned with the kernarg to be

262// preloaded use that register, otherwise we need to use a set of SGPRs and

263// merge them.

264if (!ArgInfo.FirstKernArgPreloadReg)

265ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();

266Register PreloadReg =

267TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);

268if (PreloadReg &&

269 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {

270ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);

271 NumUserSGPRs += AllocSizeDWord;

272 }else {

273for (unsignedI = 0;I < AllocSizeDWord; ++I) {

274ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());

275 NumUserSGPRs++;

276 }

277 }

278

279// Track the actual number of SGPRs that HW will preload to.

280 UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);

281return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs;

282}

283

284voidSIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF,Register VGPR,

285uint64_t Size,Align Alignment) {

286// Skip if it is an entry function or the register is already added.

287if (isEntryFunction() || WWMSpills.count(VGPR))

288return;

289

290// Skip if this is a function with the amdgpu_cs_chain or

291// amdgpu_cs_chain_preserve calling convention and this is a scratch register.

292// We never need to allocate a spill for these because we don't even need to

293// restore the inactive lanes for them (they're scratchier than the usual

294// scratch registers). We only need to do this if we have calls to

295// llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since

296// chain functions do not return) and the function did not contain a call to

297// llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes

298// when entering the function).

299if (isChainFunction() &&

300 (SIRegisterInfo::isChainScratchRegister(VGPR) ||

301 !MF.getFrameInfo().hasTailCall() ||hasInitWholeWave()))

302return;

303

304 WWMSpills.insert(std::make_pair(

305 VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));

306}

307

308// Separate out the callee-saved and scratch registers.

309voidSIMachineFunctionInfo::splitWWMSpillRegisters(

310MachineFunction &MF,

311SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,

312SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const{

313constMCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();

314for (auto &Reg : WWMSpills) {

315if (isCalleeSavedReg(CSRegs, Reg.first))

316 CalleeSavedRegs.push_back(Reg);

317else

318 ScratchRegs.push_back(Reg);

319 }

320}

321

322boolSIMachineFunctionInfo::isCalleeSavedReg(constMCPhysReg *CSRegs,

323MCPhysReg Reg) const{

324for (unsignedI = 0; CSRegs[I]; ++I) {

325if (CSRegs[I] == Reg)

326returntrue;

327 }

328

329returnfalse;

330}

331

332voidSIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(

333MachineFunction &MF,SmallVectorImpl<Register> &WWMVGPRs,

334BitVector &SavedVGPRs) {

335constSIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();

336MachineRegisterInfo &MRI = MF.getRegInfo();

337for (unsignedI = 0, E = WWMVGPRs.size();I < E; ++I) {

338Register Reg = WWMVGPRs[I];

339Register NewReg =

340TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

341if (!NewReg || NewReg >= Reg)

342break;

343

344MRI.replaceRegWith(Reg, NewReg);

345

346// Update various tables with the new VGPR.

347 WWMVGPRs[I] = NewReg;

348 WWMReservedRegs.remove(Reg);

349 WWMReservedRegs.insert(NewReg);

350MRI.reserveReg(NewReg,TRI);

351

352// Replace the register in SpillPhysVGPRs. This is needed to look for free

353// lanes while spilling special SGPRs like FP, BP, etc. during PEI.

354auto *RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg);

355if (RegItr != SpillPhysVGPRs.end()) {

356unsignedIdx = std::distance(SpillPhysVGPRs.begin(), RegItr);

357 SpillPhysVGPRs[Idx] = NewReg;

358 }

359

360// The generic `determineCalleeSaves` might have set the old register if it

361// is in the CSR range.

362 SavedVGPRs.reset(Reg);

363

364for (MachineBasicBlock &MBB : MF) {

365MBB.removeLiveIn(Reg);

366MBB.sortUniqueLiveIns();

367 }

368

369 Reg = NewReg;

370 }

371}

372

373bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(

374MachineFunction &MF,int FI,unsigned LaneIndex) {

375MachineRegisterInfo &MRI = MF.getRegInfo();

376Register LaneVGPR;

377if (!LaneIndex) {

378 LaneVGPR =MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

379 SpillVGPRs.push_back(LaneVGPR);

380 }else {

381 LaneVGPR = SpillVGPRs.back();

382 }

383

384 SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);

385returntrue;

386}

387

388bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(

389MachineFunction &MF,int FI,unsigned LaneIndex,bool IsPrologEpilog) {

390constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

391constSIRegisterInfo *TRI =ST.getRegisterInfo();

392MachineRegisterInfo &MRI = MF.getRegInfo();

393Register LaneVGPR;

394if (!LaneIndex) {

395// Find the highest available register if called before RA to ensure the

396// lowest registers are available for allocation. The LaneVGPR, in that

397// case, will be shifted back to the lowest range after VGPR allocation.

398 LaneVGPR =TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,

399 !IsPrologEpilog);

400if (LaneVGPR == AMDGPU::NoRegister) {

401// We have no VGPRs left for spilling SGPRs. Reset because we will not

402// partially spill the SGPR to VGPRs.

403 SGPRSpillsToPhysicalVGPRLanes.erase(FI);

404returnfalse;

405 }

406

407if (IsPrologEpilog)

408allocateWWMSpill(MF, LaneVGPR);

409

410reserveWWMRegister(LaneVGPR);

411for (MachineBasicBlock &MBB : MF) {

412MBB.addLiveIn(LaneVGPR);

413MBB.sortUniqueLiveIns();

414 }

415 SpillPhysVGPRs.push_back(LaneVGPR);

416 }else {

417 LaneVGPR = SpillPhysVGPRs.back();

418 }

419

420 SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);

421returntrue;

422}

423

424boolSIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(

425MachineFunction &MF,int FI,bool SpillToPhysVGPRLane,

426bool IsPrologEpilog) {

427 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =

428 SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]

429 : SGPRSpillsToVirtualVGPRLanes[FI];

430

431// This has already been allocated.

432if (!SpillLanes.empty())

433returntrue;

434

435constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

436MachineFrameInfo &FrameInfo = MF.getFrameInfo();

437unsigned WaveSize = ST.getWavefrontSize();

438

439unsignedSize = FrameInfo.getObjectSize(FI);

440unsigned NumLanes =Size / 4;

441

442if (NumLanes > WaveSize)

443returnfalse;

444

445assert(Size >= 4 &&"invalid sgpr spill size");

446assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&

447"not spilling SGPRs to VGPRs");

448

449unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes

450 : NumVirtualVGPRSpillLanes;

451

452for (unsignedI = 0;I < NumLanes; ++I, ++NumSpillLanes) {

453unsigned LaneIndex = (NumSpillLanes % WaveSize);

454

455bool Allocated = SpillToPhysVGPRLane

456 ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,

457 IsPrologEpilog)

458 : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);

459if (!Allocated) {

460 NumSpillLanes -=I;

461returnfalse;

462 }

463 }

464

465returntrue;

466}

467

468/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.

469/// Either AGPR is spilled to VGPR to vice versa.

470/// Returns true if a \p FI can be eliminated completely.

471boolSIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,

472int FI,

473bool isAGPRtoVGPR) {

474MachineRegisterInfo &MRI = MF.getRegInfo();

475MachineFrameInfo &FrameInfo = MF.getFrameInfo();

476constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

477

478assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));

479

480auto &Spill = VGPRToAGPRSpills[FI];

481

482// This has already been allocated.

483if (!Spill.Lanes.empty())

484return Spill.FullyAllocated;

485

486unsignedSize = FrameInfo.getObjectSize(FI);

487unsigned NumLanes =Size / 4;

488 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);

489

490constTargetRegisterClass &RC =

491 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;

492auto Regs = RC.getRegisters();

493

494auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;

495constSIRegisterInfo *TRI = ST.getRegisterInfo();

496 Spill.FullyAllocated =true;

497

498// FIXME: Move allocation logic out of MachineFunctionInfo and initialize

499// once.

500BitVector OtherUsedRegs;

501 OtherUsedRegs.resize(TRI->getNumRegs());

502

503constuint32_t *CSRMask =

504TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());

505if (CSRMask)

506 OtherUsedRegs.setBitsInMask(CSRMask);

507

508// TODO: Should include register tuples, but doesn't matter with current

509// usage.

510for (MCPhysReg Reg : SpillAGPR)

511 OtherUsedRegs.set(Reg);

512for (MCPhysReg Reg : SpillVGPR)

513 OtherUsedRegs.set(Reg);

514

515SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();

516for (intI = NumLanes - 1;I >= 0; --I) {

517 NextSpillReg = std::find_if(

518 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {

519 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&

520 !OtherUsedRegs[Reg];

521 });

522

523if (NextSpillReg == Regs.end()) {// Registers exhausted

524 Spill.FullyAllocated =false;

525break;

526 }

527

528 OtherUsedRegs.set(*NextSpillReg);

529 SpillRegs.push_back(*NextSpillReg);

530MRI.reserveReg(*NextSpillReg,TRI);

531 Spill.Lanes[I] = *NextSpillReg++;

532 }

533

534return Spill.FullyAllocated;

535}

536

537boolSIMachineFunctionInfo::removeDeadFrameIndices(

538MachineFrameInfo &MFI,bool ResetSGPRSpillStackIDs) {

539// Remove dead frame indices from function frame, however keep FP & BP since

540// spills for them haven't been inserted yet. And also make sure to remove the

541// frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,

542// otherwise, it could result in an unexpected side effect and bug, in case of

543// any re-mapping of freed frame indices by later pass(es) like "stack slot

544// coloring".

545for (auto &R :make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {

546 MFI.RemoveStackObject(R.first);

547 SGPRSpillsToVirtualVGPRLanes.erase(R.first);

548 }

549

550// Remove the dead frame indices of CSR SGPRs which are spilled to physical

551// VGPR lanes during SILowerSGPRSpills pass.

552if (!ResetSGPRSpillStackIDs) {

553for (auto &R :make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {

554 MFI.RemoveStackObject(R.first);

555 SGPRSpillsToPhysicalVGPRLanes.erase(R.first);

556 }

557 }

558bool HaveSGPRToMemory =false;

559

560if (ResetSGPRSpillStackIDs) {

561// All other SGPRs must be allocated on the default stack, so reset the

562// stack ID.

563for (intI = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();I != E;

564 ++I) {

565if (!checkIndexInPrologEpilogSGPRSpills(I)) {

566if (MFI.getStackID(I) ==TargetStackID::SGPRSpill) {

567 MFI.setStackID(I,TargetStackID::Default);

568 HaveSGPRToMemory =true;

569 }

570 }

571 }

572 }

573

574for (auto &R : VGPRToAGPRSpills) {

575if (R.second.IsDead)

576 MFI.RemoveStackObject(R.first);

577 }

578

579return HaveSGPRToMemory;

580}

581

582intSIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,

583constSIRegisterInfo &TRI) {

584if (ScavengeFI)

585return *ScavengeFI;

586

587 ScavengeFI =

588 MFI.CreateStackObject(TRI.getSpillSize(AMDGPU::SGPR_32RegClass),

589TRI.getSpillAlign(AMDGPU::SGPR_32RegClass),false);

590return *ScavengeFI;

591}

592

593MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const{

594assert(NumSystemSGPRs == 0 &&"System SGPRs must be added after user SGPRs");

595return AMDGPU::SGPR0 + NumUserSGPRs;

596}

597

598MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const{

599return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;

600}

601

602void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {

603 VRegFlags.grow(Reg);

604}

605

606void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,

607Register SrcReg) {

608 VRegFlags.grow(NewReg);

609 VRegFlags[NewReg] = VRegFlags[SrcReg];

610}

611

612Register

613SIMachineFunctionInfo::getGITPtrLoReg(constMachineFunction &MF) const{

614constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

615if (!ST.isAmdPalOS())

616returnRegister();

617Register GitPtrLo = AMDGPU::SGPR0;// Low GIT address passed in

618if (ST.hasMergedShaders()) {

619switch (MF.getFunction().getCallingConv()) {

620caseCallingConv::AMDGPU_HS:

621caseCallingConv::AMDGPU_GS:

622// Low GIT address is passed in s8 rather than s0 for an LS+HS or

623// ES+GS merged shader on gfx9+.

624 GitPtrLo = AMDGPU::SGPR8;

625return GitPtrLo;

626default:

627return GitPtrLo;

628 }

629 }

630return GitPtrLo;

631}

632

633staticyaml::StringValue regToString(Register Reg,

634constTargetRegisterInfo &TRI) {

635yaml::StringValue Dest;

636 {

637raw_string_ostream OS(Dest.Value);

638OS <<printReg(Reg, &TRI);

639 }

640return Dest;

641}

642

643static std::optional<yaml::SIArgumentInfo>

644convertArgumentInfo(constAMDGPUFunctionArgInfo &ArgInfo,

645constTargetRegisterInfo &TRI) {

646yaml::SIArgumentInfo AI;

647

648auto convertArg = [&](std::optional<yaml::SIArgument> &A,

649constArgDescriptor &Arg) {

650if (!Arg)

651returnfalse;

652

653// Create a register or stack argument.

654yaml::SIArgument SA =yaml::SIArgument::createArgument(Arg.isRegister());

655if (Arg.isRegister()) {

656raw_string_ostream OS(SA.RegisterName.Value);

657OS <<printReg(Arg.getRegister(), &TRI);

658 }else

659 SA.StackOffset = Arg.getStackOffset();

660// Check and update the optional mask.

661if (Arg.isMasked())

662 SA.Mask = Arg.getMask();

663

664A = SA;

665returntrue;

666 };

667

668// TODO: Need to serialize kernarg preloads.

669boolAny =false;

670Any |= convertArg(AI.PrivateSegmentBuffer,ArgInfo.PrivateSegmentBuffer);

671Any |= convertArg(AI.DispatchPtr,ArgInfo.DispatchPtr);

672Any |= convertArg(AI.QueuePtr,ArgInfo.QueuePtr);

673Any |= convertArg(AI.KernargSegmentPtr,ArgInfo.KernargSegmentPtr);

674Any |= convertArg(AI.DispatchID,ArgInfo.DispatchID);

675Any |= convertArg(AI.FlatScratchInit,ArgInfo.FlatScratchInit);

676Any |= convertArg(AI.LDSKernelId,ArgInfo.LDSKernelId);

677Any |= convertArg(AI.PrivateSegmentSize,ArgInfo.PrivateSegmentSize);

678Any |= convertArg(AI.WorkGroupIDX,ArgInfo.WorkGroupIDX);

679Any |= convertArg(AI.WorkGroupIDY,ArgInfo.WorkGroupIDY);

680Any |= convertArg(AI.WorkGroupIDZ,ArgInfo.WorkGroupIDZ);

681Any |= convertArg(AI.WorkGroupInfo,ArgInfo.WorkGroupInfo);

682Any |= convertArg(AI.PrivateSegmentWaveByteOffset,

683ArgInfo.PrivateSegmentWaveByteOffset);

684Any |= convertArg(AI.ImplicitArgPtr,ArgInfo.ImplicitArgPtr);

685Any |= convertArg(AI.ImplicitBufferPtr,ArgInfo.ImplicitBufferPtr);

686Any |= convertArg(AI.WorkItemIDX,ArgInfo.WorkItemIDX);

687Any |= convertArg(AI.WorkItemIDY,ArgInfo.WorkItemIDY);

688Any |= convertArg(AI.WorkItemIDZ,ArgInfo.WorkItemIDZ);

689

690if (Any)

691return AI;

692

693return std::nullopt;

694}

695

696yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(

697constllvm::SIMachineFunctionInfo &MFI,constTargetRegisterInfo &TRI,

698constllvm::MachineFunction &MF)

699 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),

700 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),

701 GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()),

702 IsEntryFunction(MFI.isEntryFunction()),

703 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),

704 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),

705 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),

706 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),

707 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),

708 Occupancy(MFI.getOccupancy()),

709 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(),TRI)),

710 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(),TRI)),

711 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(),TRI)),

712 BytesInStackArgArea(MFI.getBytesInStackArgArea()),

713 ReturnsVoid(MFI.returnsVoid()),

714ArgInfo(convertArgumentInfo(MFI.getArgInfo(),TRI)),

715 PSInputAddr(MFI.getPSInputAddr()), PSInputEnable(MFI.getPSInputEnable()),

716 MaxMemoryClusterDWords(MFI.getMaxMemoryClusterDWords()),

717 Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()) {

718for (Register Reg : MFI.getSGPRSpillPhysVGPRs())

719SpillPhysVGPRS.push_back(regToString(Reg,TRI));

720

721for (Register Reg : MFI.getWWMReservedRegs())

722WWMReservedRegs.push_back(regToString(Reg,TRI));

723

724if (MFI.getLongBranchReservedReg())

725LongBranchReservedReg =regToString(MFI.getLongBranchReservedReg(),TRI);

726if (MFI.getVGPRForAGPRCopy())

727VGPRForAGPRCopy =regToString(MFI.getVGPRForAGPRCopy(),TRI);

728

729if (MFI.getSGPRForEXECCopy())

730SGPRForEXECCopy =regToString(MFI.getSGPRForEXECCopy(),TRI);

731

732auto SFI = MFI.getOptionalScavengeFI();

733if (SFI)

734ScavengeFI =yaml::FrameIndex(*SFI, MF.getFrameInfo());

735}

736

737voidyaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {

738MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);

739}

740

741boolSIMachineFunctionInfo::initializeBaseYamlFields(

742constyaml::SIMachineFunctionInfo &YamlMFI,constMachineFunction &MF,

743PerFunctionMIParsingState &PFS,SMDiagnostic &Error,SMRange &SourceRange) {

744ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;

745MaxKernArgAlign = YamlMFI.MaxKernArgAlign;

746LDSSize = YamlMFI.LDSSize;

747GDSSize = YamlMFI.GDSSize;

748DynLDSAlign = YamlMFI.DynLDSAlign;

749PSInputAddr = YamlMFI.PSInputAddr;

750PSInputEnable = YamlMFI.PSInputEnable;

751MaxMemoryClusterDWords = YamlMFI.MaxMemoryClusterDWords;

752HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;

753Occupancy = YamlMFI.Occupancy;

754IsEntryFunction = YamlMFI.IsEntryFunction;

755NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;

756MemoryBound = YamlMFI.MemoryBound;

757WaveLimiter = YamlMFI.WaveLimiter;

758HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;

759HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;

760BytesInStackArgArea = YamlMFI.BytesInStackArgArea;

761ReturnsVoid = YamlMFI.ReturnsVoid;

762

763if (YamlMFI.ScavengeFI) {

764auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());

765if (!FIOrErr) {

766// Create a diagnostic for a the frame index.

767constMemoryBuffer &Buffer =

768 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());

769

770Error =SMDiagnostic(*PFS.SM,SMLoc(), Buffer.getBufferIdentifier(), 1, 1,

771SourceMgr::DK_Error,toString(FIOrErr.takeError()),

772"", {}, {});

773 SourceRange = YamlMFI.ScavengeFI->SourceRange;

774returntrue;

775 }

776ScavengeFI = *FIOrErr;

777 }else {

778ScavengeFI = std::nullopt;

779 }

780returnfalse;

781}

782

783boolSIMachineFunctionInfo::mayUseAGPRs(constFunction &F) const{

784return !F.hasFnAttribute("amdgpu-no-agpr");

785}

786

787boolSIMachineFunctionInfo::usesAGPRs(constMachineFunction &MF) const{

788if (UsesAGPRs)

789return *UsesAGPRs;

790

791if (!mayNeedAGPRs()) {

792 UsesAGPRs =false;

793returnfalse;

794 }

795

796if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||

797 MF.getFrameInfo().hasCalls()) {

798 UsesAGPRs =true;

799returntrue;

800 }

801

802constMachineRegisterInfo &MRI = MF.getRegInfo();

803

804for (unsignedI = 0, E =MRI.getNumVirtRegs();I != E; ++I) {

805constRegister Reg =Register::index2VirtReg(I);

806constTargetRegisterClass *RC =MRI.getRegClassOrNull(Reg);

807if (RC &&SIRegisterInfo::isAGPRClass(RC)) {

808 UsesAGPRs =true;

809returntrue;

810 }

811if (!RC && !MRI.use_empty(Reg) &&MRI.getType(Reg).isValid()) {

812// Defer caching UsesAGPRs, function might not yet been regbank selected.

813returntrue;

814 }

815 }

816

817for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {

818if (MRI.isPhysRegUsed(Reg)) {

819 UsesAGPRs =true;

820returntrue;

821 }

822 }

823

824 UsesAGPRs =false;

825returnfalse;

826}

MRI

unsigned const MachineRegisterInfo * MRI

Definition:AArch64AdvSIMDScalarPass.cpp:105

AMDGPUBaseInfo.h

AMDGPUMCTargetDesc.h

Provides AMDGPU specific target descriptions.

AMDGPUSubtarget.h

Base class for AMDGPU specific classes of TargetSubtarget.

MBB

MachineBasicBlock & MBB

Definition:ARMSLSHardening.cpp:71

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

CallingConv.h

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

DiagnosticInfo.h

Size

uint64_t Size

Definition:ELFObjHandler.cpp:81

YamlIO

IO & YamlIO

Definition:ELFYAML.cpp:1314

GCNSubtarget.h

AMD GCN specific subclass of TargetSubtarget.

Function.h

LiveIntervals.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

MachineRegisterInfo.h

TRI

unsigned const TargetRegisterInfo * TRI

Definition:MachineSink.cpp:2029

auto CC

Definition:RISCVRedundantCopyElimination.cpp:79

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

MAX_LANES

@ MAX_LANES

Definition:SIMachineFunctionInfo.cpp:28

getTM

const GCNTargetMachine & getTM(const GCNSubtarget *STI)

Definition:SIMachineFunctionInfo.cpp:32

convertArgumentInfo

static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)

Definition:SIMachineFunctionInfo.cpp:644

regToString

static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)

Definition:SIMachineFunctionInfo.cpp:633

SIMachineFunctionInfo.h

SIRegisterInfo.h

Interface definition for SIRegisterInfo.

raw_pwrite_stream & OS

Definition:SampleProfWriter.cpp:51

llvm::AMDGPUArgumentUsageInfo::FixedABIFunctionInfo

static const AMDGPUFunctionArgInfo FixedABIFunctionInfo

Definition:AMDGPUArgumentUsageInfo.h:182

llvm::AMDGPUMachineFunction

Definition:AMDGPUMachineFunction.h:24

llvm::AMDGPUMachineFunction::getLDSSize

uint32_t getLDSSize() const

Definition:AMDGPUMachineFunction.h:81

llvm::AMDGPUMachineFunction::MaxKernArgAlign

Align MaxKernArgAlign

Definition:AMDGPUMachineFunction.h:31

llvm::AMDGPUMachineFunction::isChainFunction

bool isChainFunction() const

Definition:AMDGPUMachineFunction.h:95

llvm::AMDGPUMachineFunction::hasInitWholeWave

bool hasInitWholeWave() const

Definition:AMDGPUMachineFunction.h:114

llvm::AMDGPUMachineFunction::isEntryFunction

bool isEntryFunction() const

Definition:AMDGPUMachineFunction.h:89

llvm::AMDGPUSubtarget::GFX9

@ GFX9

Definition:AMDGPUSubtarget.h:40

llvm::Any

Definition:Any.h:28

llvm::Attribute

Definition:Attributes.h:67

llvm::BitVector

Definition:BitVector.h:82

llvm::BitVector::reset

BitVector & reset()

Definition:BitVector.h:392

llvm::BitVector::resize

void resize(unsigned N, bool t=false)

resize - Grow or shrink the bitvector.

Definition:BitVector.h:341

llvm::BitVector::set

BitVector & set()

Definition:BitVector.h:351

llvm::BitVector::setBitsInMask

void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

setBitsInMask - Add '1' bits from Mask to this vector.

Definition:BitVector.h:707

llvm::BitVector::push_back

void push_back(bool Val)

Definition:BitVector.h:466

llvm::BumpPtrAllocatorImpl

Allocate memory in an ever growing pool, as if by bump-pointer.

Definition:Allocator.h:66

llvm::DenseMap

Definition:DenseMap.h:727

llvm::Error

Lightweight error class with error context and mandatory checking.

Definition:Error.h:160

llvm::Function

Definition:Function.h:63

llvm::Function::getCallingConv

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

Definition:Function.h:277

llvm::GCNSubtarget

Definition:GCNSubtarget.h:34

llvm::GCNSubtarget::getTargetLowering

const SITargetLowering * getTargetLowering() const override

Definition:GCNSubtarget.h:287

llvm::GCNTargetMachine

Definition:AMDGPUTargetMachine.h:80

llvm::GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs

void allocKernargPreloadSGPRs(unsigned NumSGPRs)

Definition:GCNSubtarget.cpp:668

llvm::MCRegister

Wrapper class representing physical registers. Should be passed by value.

Definition:MCRegister.h:33

llvm::MachineBasicBlock

Definition:MachineBasicBlock.h:125

llvm::MachineBasicBlock::removeLiveIn

void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())

Remove the specified register from the live in set.

Definition:MachineBasicBlock.cpp:600

llvm::MachineBasicBlock::sortUniqueLiveIns

void sortUniqueLiveIns()

Sorts and uniques the LiveIns vector.

Definition:MachineBasicBlock.cpp:624

llvm::MachineBasicBlock::addLiveIn

void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())

Adds the specified register as a live in.

Definition:MachineBasicBlock.h:456

llvm::MachineFrameInfo

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

Definition:MachineFrameInfo.h:106

llvm::MachineFrameInfo::CreateStackObject

int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)

Create a new statically sized stack object, returning a nonnegative identifier to represent it.

Definition:MachineFrameInfo.cpp:51

llvm::MachineFrameInfo::hasCalls

bool hasCalls() const

Return true if the current function has any function calls.

Definition:MachineFrameInfo.h:621

llvm::MachineFrameInfo::CreateSpillStackObject

int CreateSpillStackObject(uint64_t Size, Align Alignment)

Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...

Definition:MachineFrameInfo.cpp:66

llvm::MachineFrameInfo::setStackID

void setStackID(int ObjectIdx, uint8_t ID)

Definition:MachineFrameInfo.h:755

llvm::MachineFrameInfo::hasTailCall

bool hasTailCall() const

Returns true if the function contains a tail call.

Definition:MachineFrameInfo.h:646

llvm::MachineFrameInfo::isSpillSlotObjectIndex

bool isSpillSlotObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a spill slot.

Definition:MachineFrameInfo.h:737

llvm::MachineFrameInfo::getObjectSize

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

Definition:MachineFrameInfo.h:472

llvm::MachineFrameInfo::RemoveStackObject

void RemoveStackObject(int ObjectIdx)

Remove or mark dead a statically sized stack object.

Definition:MachineFrameInfo.h:795

llvm::MachineFrameInfo::getObjectIndexEnd

int getObjectIndexEnd() const

Return one past the maximum frame object index.

Definition:MachineFrameInfo.h:412

llvm::MachineFrameInfo::getStackID

uint8_t getStackID(int ObjectIdx) const

Definition:MachineFrameInfo.h:750

llvm::MachineFrameInfo::getObjectIndexBegin

int getObjectIndexBegin() const

Return the minimum frame object index.

Definition:MachineFrameInfo.h:409

llvm::MachineFunction

Definition:MachineFunction.h:267

llvm::MachineFunction::getSubtarget

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Definition:MachineFunction.h:733

llvm::MachineFunction::getFrameInfo

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

Definition:MachineFunction.h:749

llvm::MachineFunction::getRegInfo

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Definition:MachineFunction.h:743

llvm::MachineFunction::getFunction

Function & getFunction()

Return the LLVM function that this machine code represents.

Definition:MachineFunction.h:704

llvm::MachineFunction::cloneInfo

Ty * cloneInfo(const Ty &Old)

Definition:MachineFunction.h:840

llvm::MachineRegisterInfo

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Definition:MachineRegisterInfo.h:51

llvm::MachineRegisterInfo::getCalleeSavedRegs

const MCPhysReg * getCalleeSavedRegs() const

Returns list of callee saved registers.

Definition:MachineRegisterInfo.cpp:635

llvm::MapVector::count

size_type count(const KeyT &Key) const

Definition:MapVector.h:165

llvm::MapVector::insert

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Definition:MapVector.h:141

llvm::MemoryBuffer

This interface provides simple read-only access to a block of memory, and provides simple methods for...

Definition:MemoryBuffer.h:51

llvm::MemoryBuffer::getBufferIdentifier

virtual StringRef getBufferIdentifier() const

Return an identifier for this buffer, typically the filename it was read from.

Definition:MemoryBuffer.h:76

llvm::Register

Wrapper class representing virtual and physical registers.

Definition:Register.h:19

llvm::Register::index2VirtReg

static Register index2VirtReg(unsigned Index)

Convert a 0-based index to a virtual register number.

Definition:Register.h:84

llvm::SIMachineFunctionInfo

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

Definition:SIMachineFunctionInfo.h:390

llvm::SIMachineFunctionInfo::usesAGPRs

bool usesAGPRs(const MachineFunction &MF) const

Definition:SIMachineFunctionInfo.cpp:787

llvm::SIMachineFunctionInfo::initializeBaseYamlFields

bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)

Definition:SIMachineFunctionInfo.cpp:741

llvm::SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange

void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)

Definition:SIMachineFunctionInfo.cpp:332

llvm::SIMachineFunctionInfo::addPrivateSegmentSize

Definition:SIMachineFunctionInfo.cpp:236

llvm::SIMachineFunctionInfo::allocateWWMSpill

void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))

Definition:SIMachineFunctionInfo.cpp:284

llvm::SIMachineFunctionInfo::addDispatchPtr

Definition:SIMachineFunctionInfo.cpp:200

llvm::SIMachineFunctionInfo::getLongBranchReservedReg

Definition:SIMachineFunctionInfo.h:973

llvm::SIMachineFunctionInfo::addFlatScratchInit

Definition:SIMachineFunctionInfo.cpp:229

llvm::SIMachineFunctionInfo::getMaxWavesPerEU

unsigned getMaxWavesPerEU() const

Definition:SIMachineFunctionInfo.h:1087

llvm::SIMachineFunctionInfo::getSGPRSpillPhysVGPRs

ArrayRef< Register > getSGPRSpillPhysVGPRs() const

Definition:SIMachineFunctionInfo.h:626

llvm::SIMachineFunctionInfo::getScavengeFI

int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)

Definition:SIMachineFunctionInfo.cpp:582

llvm::SIMachineFunctionInfo::addQueuePtr

Definition:SIMachineFunctionInfo.cpp:207

llvm::SIMachineFunctionInfo::SIMachineFunctionInfo

SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default

llvm::SIMachineFunctionInfo::getGITPtrLoReg

Definition:SIMachineFunctionInfo.cpp:613

llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR

bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)

Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.

Definition:SIMachineFunctionInfo.cpp:471

llvm::SIMachineFunctionInfo::splitWWMSpillRegisters

void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const

Definition:SIMachineFunctionInfo.cpp:309

llvm::SIMachineFunctionInfo::getSGPRForEXECCopy

Definition:SIMachineFunctionInfo.h:738

llvm::SIMachineFunctionInfo::mayUseAGPRs

bool mayUseAGPRs(const Function &F) const

Definition:SIMachineFunctionInfo.cpp:783

llvm::SIMachineFunctionInfo::isCalleeSavedReg

bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const

Definition:SIMachineFunctionInfo.cpp:322

llvm::SIMachineFunctionInfo::addLDSKernelId

Definition:SIMachineFunctionInfo.cpp:249

llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy

Definition:SIMachineFunctionInfo.h:581

llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane

bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)

Definition:SIMachineFunctionInfo.cpp:424

llvm::SIMachineFunctionInfo::addKernargSegmentPtr

Definition:SIMachineFunctionInfo.cpp:214

llvm::SIMachineFunctionInfo::addDispatchID

Definition:SIMachineFunctionInfo.cpp:222

llvm::SIMachineFunctionInfo::removeDeadFrameIndices

bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)

If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.

Definition:SIMachineFunctionInfo.cpp:537

llvm::SIMachineFunctionInfo::clone

MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override

Make a functionally equivalent copy of this MachineFunctionInfo in MF.

Definition:SIMachineFunctionInfo.cpp:178

llvm::SIMachineFunctionInfo::checkIndexInPrologEpilogSGPRSpills

bool checkIndexInPrologEpilogSGPRSpills(int FI) const

Definition:SIMachineFunctionInfo.h:683

llvm::SIMachineFunctionInfo::addPrivateSegmentBuffer

Definition:SIMachineFunctionInfo.cpp:191

llvm::SIMachineFunctionInfo::getWWMReservedRegs

const ReservedRegSet & getWWMReservedRegs() const

Definition:SIMachineFunctionInfo.h:629

llvm::SIMachineFunctionInfo::getOptionalScavengeFI

std::optional< int > getOptionalScavengeFI() const

Definition:SIMachineFunctionInfo.h:775

llvm::SIMachineFunctionInfo::addImplicitBufferPtr

Definition:SIMachineFunctionInfo.cpp:242

llvm::SIMachineFunctionInfo::limitOccupancy

void limitOccupancy(const MachineFunction &MF)

Definition:SIMachineFunctionInfo.cpp:185

llvm::SIMachineFunctionInfo::addPreloadedKernArg

SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)

Definition:SIMachineFunctionInfo.cpp:255

llvm::SIMachineFunctionInfo::reserveWWMRegister

void reserveWWMRegister(Register Reg)

Definition:SIMachineFunctionInfo.h:605

llvm::SIRegisterInfo

Definition:SIRegisterInfo.h:32

llvm::SIRegisterInfo::isChainScratchRegister

static bool isChainScratchRegister(Register VGPR)

Definition:SIRegisterInfo.cpp:452

llvm::SIRegisterInfo::isAGPRClass

static bool isAGPRClass(const TargetRegisterClass *RC)

Definition:SIRegisterInfo.h:223

llvm::SITargetLowering

Definition:SIISelLowering.h:31

llvm::SMDiagnostic

Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...

Definition:SourceMgr.h:281

llvm::SMLoc

Represents a location in source code.

Definition:SMLoc.h:23

llvm::SMRange

Represents a range in source code.

Definition:SMLoc.h:48

llvm::SetVector::remove

bool remove(const value_type &X)

Remove an item from the set vector.

Definition:SetVector.h:188

llvm::SetVector::insert

bool insert(const value_type &X)

Insert a new element into the SetVector.

Definition:SetVector.h:162

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::const_iterator

typename SuperClass::const_iterator const_iterator

Definition:SmallVector.h:578

llvm::SmallVectorTemplateCommon::end

iterator end()

Definition:SmallVector.h:269

llvm::SourceMgr::getMainFileID

unsigned getMainFileID() const

Definition:SourceMgr.h:132

llvm::SourceMgr::DK_Error

@ DK_Error

Definition:SourceMgr.h:34

llvm::SourceMgr::getMemoryBuffer

const MemoryBuffer * getMemoryBuffer(unsigned i) const

Definition:SourceMgr.h:125

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::StringRef::consumeInteger

bool consumeInteger(unsigned Radix, T &Result)

Parse the current string as an integer of the specified radix.

Definition:StringRef.h:499

llvm::StringRef::empty

constexpr bool empty() const

empty - Check if the string is empty.

Definition:StringRef.h:147

llvm::TargetLoweringBase::getTargetMachine

const TargetMachine & getTargetMachine() const

Definition:TargetLowering.h:364

llvm::TargetRegisterClass

Definition:TargetRegisterInfo.h:44

llvm::TargetRegisterClass::getRegisters

ArrayRef< MCPhysReg > getRegisters() const

Definition:TargetRegisterInfo.h:83

llvm::TargetRegisterInfo

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

Definition:TargetRegisterInfo.h:235

llvm::raw_string_ostream

A raw_ostream that writes to an std::string.

Definition:raw_ostream.h:661

Definition:StackSlotColoring.cpp:193

llvm::AMDGPU::isEntryFunctionCC

bool isEntryFunctionCC(CallingConv::ID CC)

Definition:AMDGPUBaseInfo.cpp:2066

llvm::AMDGPU::isChainCC

bool isChainCC(CallingConv::ID CC)

Definition:AMDGPUBaseInfo.cpp:2092

llvm::AMDGPU::getInitialPSInputAddr

unsigned getInitialPSInputAddr(const Function &F)

Definition:AMDGPUBaseInfo.cpp:2026

llvm::AMDGPU::isGraphics

bool isGraphics(CallingConv::ID cc)

Definition:AMDGPUBaseInfo.cpp:2058

llvm::ARM_MB::ST

@ ST

Definition:ARMBaseInfo.h:73

llvm::CallingConv::AMDGPU_CS

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

Definition:CallingConv.h:197

llvm::CallingConv::AMDGPU_KERNEL

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

Definition:CallingConv.h:200

llvm::CallingConv::AMDGPU_Gfx

@ AMDGPU_Gfx

Used for AMD graphics targets.

Definition:CallingConv.h:232

llvm::CallingConv::AMDGPU_HS

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

Definition:CallingConv.h:206

llvm::CallingConv::AMDGPU_GS

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

Definition:CallingConv.h:191

llvm::CallingConv::AMDGPU_PS

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

Definition:CallingConv.h:194

llvm::CallingConv::SPIR_KERNEL

@ SPIR_KERNEL

Used for SPIR kernel functions.

Definition:CallingConv.h:144

llvm::TargetStackID::SGPRSpill

@ SGPRSpill

Definition:TargetFrameLowering.h:31

llvm::TargetStackID::Default

@ Default

Definition:TargetFrameLowering.h:30

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::MCPhysReg

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

Definition:MCRegister.h:21

llvm::make_early_inc_range

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

Definition:STLExtras.h:657

llvm::DefaultMemoryClusterDWordsLimit

constexpr unsigned DefaultMemoryClusterDWordsLimit

Definition:SIInstrInfo.h:39

llvm::toString

const char * toString(DWARFSectionKind Kind)

Definition:DWARFUnitIndex.h:67

llvm::printReg

Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

Definition:TargetRegisterInfo.cpp:107

llvm::AMDGPUFunctionArgInfo

Definition:AMDGPUArgumentUsageInfo.h:103

llvm::Align

This struct is a compact representation of a valid (non-zero power of two) alignment.

Definition:Alignment.h:39

llvm::ArgDescriptor

Definition:AMDGPUArgumentUsageInfo.h:25

llvm::ArgDescriptor::createRegister

static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)

Definition:AMDGPUArgumentUsageInfo.h:46

llvm::ArgInfo

Helper struct shared between Function Specialization and SCCP Solver.

Definition:SCCPSolver.h:41

llvm::MachineFunctionInfo

MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...

Definition:MachineFunction.h:104

llvm::PerFunctionMIParsingState

Definition:MIParser.h:165

llvm::PerFunctionMIParsingState::SM

SourceMgr * SM

Definition:MIParser.h:168

llvm::yaml::FrameIndex

A serializaable representation of a reference to a stack object or fixed stack object.

Definition:MIRYamlMapping.h:417

llvm::yaml::MappingTraits

Definition:ModuleSummaryIndex.h:54

llvm::yaml::SIArgumentInfo

Definition:SIMachineFunctionInfo.h:165

llvm::yaml::SIArgumentInfo::PrivateSegmentWaveByteOffset

std::optional< SIArgument > PrivateSegmentWaveByteOffset

Definition:SIMachineFunctionInfo.h:179

llvm::yaml::SIArgumentInfo::WorkGroupIDY

std::optional< SIArgument > WorkGroupIDY

Definition:SIMachineFunctionInfo.h:175

llvm::yaml::SIArgumentInfo::FlatScratchInit

std::optional< SIArgument > FlatScratchInit

Definition:SIMachineFunctionInfo.h:171

llvm::yaml::SIArgumentInfo::DispatchPtr

std::optional< SIArgument > DispatchPtr

Definition:SIMachineFunctionInfo.h:167

llvm::yaml::SIArgumentInfo::DispatchID

std::optional< SIArgument > DispatchID

Definition:SIMachineFunctionInfo.h:170

llvm::yaml::SIArgumentInfo::WorkItemIDY

std::optional< SIArgument > WorkItemIDY

Definition:SIMachineFunctionInfo.h:185

llvm::yaml::SIArgumentInfo::WorkGroupIDX

std::optional< SIArgument > WorkGroupIDX

Definition:SIMachineFunctionInfo.h:174

llvm::yaml::SIArgumentInfo::ImplicitArgPtr

std::optional< SIArgument > ImplicitArgPtr

Definition:SIMachineFunctionInfo.h:181

llvm::yaml::SIArgumentInfo::QueuePtr

std::optional< SIArgument > QueuePtr

Definition:SIMachineFunctionInfo.h:168

llvm::yaml::SIArgumentInfo::WorkGroupInfo

std::optional< SIArgument > WorkGroupInfo

Definition:SIMachineFunctionInfo.h:177

llvm::yaml::SIArgumentInfo::LDSKernelId

std::optional< SIArgument > LDSKernelId

Definition:SIMachineFunctionInfo.h:178

llvm::yaml::SIArgumentInfo::ImplicitBufferPtr

std::optional< SIArgument > ImplicitBufferPtr

Definition:SIMachineFunctionInfo.h:182

llvm::yaml::SIArgumentInfo::WorkItemIDX

std::optional< SIArgument > WorkItemIDX

Definition:SIMachineFunctionInfo.h:184

llvm::yaml::SIArgumentInfo::KernargSegmentPtr

std::optional< SIArgument > KernargSegmentPtr

Definition:SIMachineFunctionInfo.h:169

llvm::yaml::SIArgumentInfo::WorkItemIDZ

std::optional< SIArgument > WorkItemIDZ

Definition:SIMachineFunctionInfo.h:186

llvm::yaml::SIArgumentInfo::PrivateSegmentSize

std::optional< SIArgument > PrivateSegmentSize

Definition:SIMachineFunctionInfo.h:172

llvm::yaml::SIArgumentInfo::PrivateSegmentBuffer

std::optional< SIArgument > PrivateSegmentBuffer

Definition:SIMachineFunctionInfo.h:166

llvm::yaml::SIArgumentInfo::WorkGroupIDZ

std::optional< SIArgument > WorkGroupIDZ

Definition:SIMachineFunctionInfo.h:176

llvm::yaml::SIArgument

Definition:SIMachineFunctionInfo.h:91

llvm::yaml::SIArgument::StackOffset

unsigned StackOffset

Definition:SIMachineFunctionInfo.h:95

llvm::yaml::SIArgument::Mask

std::optional< unsigned > Mask

Definition:SIMachineFunctionInfo.h:97

llvm::yaml::SIArgument::RegisterName

StringValue RegisterName

Definition:SIMachineFunctionInfo.h:94

llvm::yaml::SIArgument::createArgument

static SIArgument createArgument(bool IsReg)

Definition:SIMachineFunctionInfo.h:132

llvm::yaml::SIMachineFunctionInfo

Definition:SIMachineFunctionInfo.h:260

llvm::yaml::SIMachineFunctionInfo::MaxMemoryClusterDWords

unsigned MaxMemoryClusterDWords

Definition:SIMachineFunctionInfo.h:292

llvm::yaml::SIMachineFunctionInfo::HasSpilledVGPRs

bool HasSpilledVGPRs

Definition:SIMachineFunctionInfo.h:272

llvm::yaml::SIMachineFunctionInfo::SGPRForEXECCopy

StringValue SGPRForEXECCopy

Definition:SIMachineFunctionInfo.h:297

llvm::yaml::SIMachineFunctionInfo::HasSpilledSGPRs

bool HasSpilledSGPRs

Definition:SIMachineFunctionInfo.h:271

llvm::yaml::SIMachineFunctionInfo::ReturnsVoid

bool ReturnsVoid

Definition:SIMachineFunctionInfo.h:286

llvm::yaml::SIMachineFunctionInfo::DynLDSAlign

Align DynLDSAlign

Definition:SIMachineFunctionInfo.h:265

llvm::yaml::SIMachineFunctionInfo::WWMReservedRegs

SmallVector< StringValue > WWMReservedRegs

Definition:SIMachineFunctionInfo.h:279

llvm::yaml::SIMachineFunctionInfo::HighBitsOf32BitAddress

uint32_t HighBitsOf32BitAddress

Definition:SIMachineFunctionInfo.h:273

llvm::yaml::SIMachineFunctionInfo::GDSSize

uint32_t GDSSize

Definition:SIMachineFunctionInfo.h:264

llvm::yaml::SIMachineFunctionInfo::MemoryBound

bool MemoryBound

Definition:SIMachineFunctionInfo.h:269

llvm::yaml::SIMachineFunctionInfo::Occupancy

unsigned Occupancy

Definition:SIMachineFunctionInfo.h:276

llvm::yaml::SIMachineFunctionInfo::PSInputEnable

unsigned PSInputEnable

Definition:SIMachineFunctionInfo.h:291

llvm::yaml::SIMachineFunctionInfo::MaxKernArgAlign

Align MaxKernArgAlign

Definition:SIMachineFunctionInfo.h:262

llvm::yaml::SIMachineFunctionInfo::SIMachineFunctionInfo

SIMachineFunctionInfo()=default

llvm::yaml::SIMachineFunctionInfo::LongBranchReservedReg

StringValue LongBranchReservedReg

Definition:SIMachineFunctionInfo.h:298

llvm::yaml::SIMachineFunctionInfo::ExplicitKernArgSize

uint64_t ExplicitKernArgSize

Definition:SIMachineFunctionInfo.h:261

llvm::yaml::SIMachineFunctionInfo::LDSSize

uint32_t LDSSize

Definition:SIMachineFunctionInfo.h:263

llvm::yaml::SIMachineFunctionInfo::WaveLimiter

bool WaveLimiter

Definition:SIMachineFunctionInfo.h:270

llvm::yaml::SIMachineFunctionInfo::PSInputAddr

unsigned PSInputAddr

Definition:SIMachineFunctionInfo.h:290

llvm::yaml::SIMachineFunctionInfo::mappingImpl

void mappingImpl(yaml::IO &YamlIO) override

Definition:SIMachineFunctionInfo.cpp:737

llvm::yaml::SIMachineFunctionInfo::NoSignedZerosFPMath

bool NoSignedZerosFPMath

Definition:SIMachineFunctionInfo.h:268

llvm::yaml::SIMachineFunctionInfo::IsEntryFunction

bool IsEntryFunction

Definition:SIMachineFunctionInfo.h:266

llvm::yaml::SIMachineFunctionInfo::VGPRForAGPRCopy

StringValue VGPRForAGPRCopy

Definition:SIMachineFunctionInfo.h:296

llvm::yaml::SIMachineFunctionInfo::SpillPhysVGPRS

SmallVector< StringValue, 2 > SpillPhysVGPRS

Definition:SIMachineFunctionInfo.h:278

llvm::yaml::SIMachineFunctionInfo::ScavengeFI

std::optional< FrameIndex > ScavengeFI

Definition:SIMachineFunctionInfo.h:295

llvm::yaml::SIMachineFunctionInfo::BytesInStackArgArea

unsigned BytesInStackArgArea

Definition:SIMachineFunctionInfo.h:285

llvm::yaml::StringValue

A wrapper around std::string which contains a source range that's being set during parsing.

Definition:MIRYamlMapping.h:34

llvm::yaml::StringValue::Value

std::string Value

Definition:MIRYamlMapping.h:35