Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SIMachineFunctionInfo.h"
10#include "AMDGPUSubtarget.h"
11#include "GCNSubtarget.h"
12#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13#include "SIRegisterInfo.h"
14#include "Utils/AMDGPUBaseInfo.h"
15#include "llvm/CodeGen/LiveIntervals.h"
16#include "llvm/CodeGen/MIRParser/MIParser.h"
17#include "llvm/CodeGen/MachineBasicBlock.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/IR/CallingConv.h"
22#include "llvm/IR/DiagnosticInfo.h"
23#include "llvm/IR/Function.h"
24#include <cassert>
25#include <optional>
26#include <vector>
27
28enum {MAX_LANES = 64 };
29
30using namespacellvm;
31
32constGCNTargetMachine &getTM(constGCNSubtarget *STI) {
33constSITargetLowering *TLI = STI->getTargetLowering();
34returnstatic_cast<constGCNTargetMachine &>(TLI->getTargetMachine());
35}
36
37SIMachineFunctionInfo::SIMachineFunctionInfo(constFunction &F,
38constGCNSubtarget *STI)
39 :AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
40 UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
41 WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
42 PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
43 WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
44 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
45constGCNSubtarget &ST = *static_cast<constGCNSubtarget *>(STI);
46 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
47 WavesPerEU = ST.getWavesPerEU(F);
48 MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);
49assert(MaxNumWorkGroups.size() == 3);
50
51 Occupancy = ST.computeOccupancy(F,getLDSSize()).second;
52CallingConv::IDCC =F.getCallingConv();
53
54 VRegFlags.reserve(1024);
55
56constbool IsKernel =CC ==CallingConv::AMDGPU_KERNEL ||
57CC ==CallingConv::SPIR_KERNEL;
58
59if (IsKernel) {
60 WorkGroupIDX =true;
61 WorkItemIDX =true;
62 }elseif (CC ==CallingConv::AMDGPU_PS) {
63 PSInputAddr =AMDGPU::getInitialPSInputAddr(F);
64 }
65
66 MayNeedAGPRs = ST.hasMAIInsts();
67
68if (AMDGPU::isChainCC(CC)) {
69// Chain functions don't receive an SP from their caller, but are free to
70// set one up. For now, we can use s32 to match what amdgpu_gfx functions
71// would use if called, but this can be revisited.
72// FIXME: Only reserve this if we actually need it.
73 StackPtrOffsetReg = AMDGPU::SGPR32;
74
75 ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
76
77ArgInfo.PrivateSegmentBuffer =
78ArgDescriptor::createRegister(ScratchRSrcReg);
79
80 ImplicitArgPtr =false;
81 }elseif (!isEntryFunction()) {
82if (CC !=CallingConv::AMDGPU_Gfx)
83ArgInfo =AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
84
85 FrameOffsetReg = AMDGPU::SGPR33;
86 StackPtrOffsetReg = AMDGPU::SGPR32;
87
88if (!ST.enableFlatScratch()) {
89// Non-entry functions have no special inputs for now, other registers
90// required for scratch access.
91 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
92
93ArgInfo.PrivateSegmentBuffer =
94ArgDescriptor::createRegister(ScratchRSrcReg);
95 }
96
97if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
98 ImplicitArgPtr =true;
99 }else {
100 ImplicitArgPtr =false;
101MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
102MaxKernArgAlign);
103
104if (ST.hasGFX90AInsts() &&
105 ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
106 !mayUseAGPRs(F))
107 MayNeedAGPRs =false;// We will select all MAI with VGPR operands.
108 }
109
110if (!AMDGPU::isGraphics(CC) ||
111 ((CC ==CallingConv::AMDGPU_CS ||CC ==CallingConv::AMDGPU_Gfx) &&
112 ST.hasArchitectedSGPRs())) {
113if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
114 WorkGroupIDX =true;
115
116if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
117 WorkGroupIDY =true;
118
119if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
120 WorkGroupIDZ =true;
121 }
122
123if (!AMDGPU::isGraphics(CC)) {
124if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
125 WorkItemIDX =true;
126
127if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
128 ST.getMaxWorkitemID(F, 1) != 0)
129 WorkItemIDY =true;
130
131if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
132 ST.getMaxWorkitemID(F, 2) != 0)
133 WorkItemIDZ =true;
134
135if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
136 LDSKernelId =true;
137 }
138
139if (isEntryFunction()) {
140// X, XY, and XYZ are the only supported combinations, so make sure Y is
141// enabled if Z is.
142if (WorkItemIDZ)
143 WorkItemIDY =true;
144
145if (!ST.flatScratchIsArchitected()) {
146 PrivateSegmentWaveByteOffset =true;
147
148// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
149if (ST.getGeneration() >=AMDGPUSubtarget::GFX9 &&
150 (CC ==CallingConv::AMDGPU_HS ||CC ==CallingConv::AMDGPU_GS))
151ArgInfo.PrivateSegmentWaveByteOffset =
152ArgDescriptor::createRegister(AMDGPU::SGPR5);
153 }
154 }
155
156AttributeA =F.getFnAttribute("amdgpu-git-ptr-high");
157StringRef S =A.getValueAsString();
158if (!S.empty())
159 S.consumeInteger(0, GITPtrHigh);
160
161A =F.getFnAttribute("amdgpu-32bit-address-high-bits");
162 S =A.getValueAsString();
163if (!S.empty())
164 S.consumeInteger(0, HighBitsOf32BitAddress);
165
166 MaxMemoryClusterDWords =F.getFnAttributeAsParsedInteger(
167"amdgpu-max-memory-cluster-dwords",DefaultMemoryClusterDWordsLimit);
168
169// On GFX908, in order to guarantee copying between AGPRs, we need a scratch
170// VGPR available at all times. For now, reserve highest available VGPR. After
171// RA, shift it to the lowest available unused VGPR if the one exist.
172if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
173 VGPRForAGPRCopy =
174 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
175 }
176}
177
178MachineFunctionInfo *SIMachineFunctionInfo::clone(
179BumpPtrAllocator &Allocator,MachineFunction &DestMF,
180constDenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
181 const{
182return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
183}
184
185voidSIMachineFunctionInfo::limitOccupancy(constMachineFunction &MF) {
186limitOccupancy(getMaxWavesPerEU());
187constGCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
188limitOccupancy(ST.getOccupancyWithWorkGroupSizes(MF).second);
189}
190
191RegisterSIMachineFunctionInfo::addPrivateSegmentBuffer(
192constSIRegisterInfo &TRI) {
193ArgInfo.PrivateSegmentBuffer =
194ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
195 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
196 NumUserSGPRs += 4;
197returnArgInfo.PrivateSegmentBuffer.getRegister();
198}
199
200RegisterSIMachineFunctionInfo::addDispatchPtr(constSIRegisterInfo &TRI) {
201ArgInfo.DispatchPtr =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
202 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
203 NumUserSGPRs += 2;
204returnArgInfo.DispatchPtr.getRegister();
205}
206
207RegisterSIMachineFunctionInfo::addQueuePtr(constSIRegisterInfo &TRI) {
208ArgInfo.QueuePtr =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
209 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
210 NumUserSGPRs += 2;
211returnArgInfo.QueuePtr.getRegister();
212}
213
214RegisterSIMachineFunctionInfo::addKernargSegmentPtr(constSIRegisterInfo &TRI) {
215ArgInfo.KernargSegmentPtr
216 =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
217 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
218 NumUserSGPRs += 2;
219returnArgInfo.KernargSegmentPtr.getRegister();
220}
221
222RegisterSIMachineFunctionInfo::addDispatchID(constSIRegisterInfo &TRI) {
223ArgInfo.DispatchID =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
224 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
225 NumUserSGPRs += 2;
226returnArgInfo.DispatchID.getRegister();
227}
228
229RegisterSIMachineFunctionInfo::addFlatScratchInit(constSIRegisterInfo &TRI) {
230ArgInfo.FlatScratchInit =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
231 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
232 NumUserSGPRs += 2;
233returnArgInfo.FlatScratchInit.getRegister();
234}
235
236RegisterSIMachineFunctionInfo::addPrivateSegmentSize(constSIRegisterInfo &TRI) {
237ArgInfo.PrivateSegmentSize =ArgDescriptor::createRegister(getNextUserSGPR());
238 NumUserSGPRs += 1;
239returnArgInfo.PrivateSegmentSize.getRegister();
240}
241
242RegisterSIMachineFunctionInfo::addImplicitBufferPtr(constSIRegisterInfo &TRI) {
243ArgInfo.ImplicitBufferPtr =ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
244 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
245 NumUserSGPRs += 2;
246returnArgInfo.ImplicitBufferPtr.getRegister();
247}
248
249RegisterSIMachineFunctionInfo::addLDSKernelId() {
250ArgInfo.LDSKernelId =ArgDescriptor::createRegister(getNextUserSGPR());
251 NumUserSGPRs += 1;
252returnArgInfo.LDSKernelId.getRegister();
253}
254
255SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
256constSIRegisterInfo &TRI,constTargetRegisterClass *RC,
257unsigned AllocSizeDWord,int KernArgIdx,int PaddingSGPRs) {
258assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) &&
259"Preload kernel argument allocated twice.");
260 NumUserSGPRs += PaddingSGPRs;
261// If the available register tuples are aligned with the kernarg to be
262// preloaded use that register, otherwise we need to use a set of SGPRs and
263// merge them.
264if (!ArgInfo.FirstKernArgPreloadReg)
265ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();
266Register PreloadReg =
267TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
268if (PreloadReg &&
269 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
270ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);
271 NumUserSGPRs += AllocSizeDWord;
272 }else {
273for (unsignedI = 0;I < AllocSizeDWord; ++I) {
274ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());
275 NumUserSGPRs++;
276 }
277 }
278
279// Track the actual number of SGPRs that HW will preload to.
280 UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);
281return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
282}
283
284voidSIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF,Register VGPR,
285uint64_tSize,Align Alignment) {
286// Skip if it is an entry function or the register is already added.
287if (isEntryFunction() || WWMSpills.count(VGPR))
288return;
289
290// Skip if this is a function with the amdgpu_cs_chain or
291// amdgpu_cs_chain_preserve calling convention and this is a scratch register.
292// We never need to allocate a spill for these because we don't even need to
293// restore the inactive lanes for them (they're scratchier than the usual
294// scratch registers). We only need to do this if we have calls to
295// llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since
296// chain functions do not return) and the function did not contain a call to
297// llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes
298// when entering the function).
299if (isChainFunction() &&
300 (SIRegisterInfo::isChainScratchRegister(VGPR) ||
301 !MF.getFrameInfo().hasTailCall() ||hasInitWholeWave()))
302return;
303
304 WWMSpills.insert(std::make_pair(
305 VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
306}
307
308// Separate out the callee-saved and scratch registers.
309voidSIMachineFunctionInfo::splitWWMSpillRegisters(
310MachineFunction &MF,
311SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
312SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const{
313constMCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
314for (auto &Reg : WWMSpills) {
315if (isCalleeSavedReg(CSRegs, Reg.first))
316 CalleeSavedRegs.push_back(Reg);
317else
318 ScratchRegs.push_back(Reg);
319 }
320}
321
322boolSIMachineFunctionInfo::isCalleeSavedReg(constMCPhysReg *CSRegs,
323MCPhysReg Reg) const{
324for (unsignedI = 0; CSRegs[I]; ++I) {
325if (CSRegs[I] == Reg)
326returntrue;
327 }
328
329returnfalse;
330}
331
332voidSIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(
333MachineFunction &MF,SmallVectorImpl<Register> &WWMVGPRs,
334BitVector &SavedVGPRs) {
335constSIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
336MachineRegisterInfo &MRI = MF.getRegInfo();
337for (unsignedI = 0, E = WWMVGPRs.size();I < E; ++I) {
338Register Reg = WWMVGPRs[I];
339Register NewReg =
340TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
341if (!NewReg || NewReg >= Reg)
342break;
343
344MRI.replaceRegWith(Reg, NewReg);
345
346// Update various tables with the new VGPR.
347 WWMVGPRs[I] = NewReg;
348 WWMReservedRegs.remove(Reg);
349 WWMReservedRegs.insert(NewReg);
350MRI.reserveReg(NewReg,TRI);
351
352// Replace the register in SpillPhysVGPRs. This is needed to look for free
353// lanes while spilling special SGPRs like FP, BP, etc. during PEI.
354auto *RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg);
355if (RegItr != SpillPhysVGPRs.end()) {
356unsignedIdx = std::distance(SpillPhysVGPRs.begin(), RegItr);
357 SpillPhysVGPRs[Idx] = NewReg;
358 }
359
360// The generic `determineCalleeSaves` might have set the old register if it
361// is in the CSR range.
362 SavedVGPRs.reset(Reg);
363
364for (MachineBasicBlock &MBB : MF) {
365MBB.removeLiveIn(Reg);
366MBB.sortUniqueLiveIns();
367 }
368
369 Reg = NewReg;
370 }
371}
372
373bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
374MachineFunction &MF,int FI,unsigned LaneIndex) {
375MachineRegisterInfo &MRI = MF.getRegInfo();
376Register LaneVGPR;
377if (!LaneIndex) {
378 LaneVGPR =MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
379 SpillVGPRs.push_back(LaneVGPR);
380 }else {
381 LaneVGPR = SpillVGPRs.back();
382 }
383
384 SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
385returntrue;
386}
387
388bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
389MachineFunction &MF,int FI,unsigned LaneIndex,bool IsPrologEpilog) {
390constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
391constSIRegisterInfo *TRI =ST.getRegisterInfo();
392MachineRegisterInfo &MRI = MF.getRegInfo();
393Register LaneVGPR;
394if (!LaneIndex) {
395// Find the highest available register if called before RA to ensure the
396// lowest registers are available for allocation. The LaneVGPR, in that
397// case, will be shifted back to the lowest range after VGPR allocation.
398 LaneVGPR =TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,
399 !IsPrologEpilog);
400if (LaneVGPR == AMDGPU::NoRegister) {
401// We have no VGPRs left for spilling SGPRs. Reset because we will not
402// partially spill the SGPR to VGPRs.
403 SGPRSpillsToPhysicalVGPRLanes.erase(FI);
404returnfalse;
405 }
406
407if (IsPrologEpilog)
408allocateWWMSpill(MF, LaneVGPR);
409
410reserveWWMRegister(LaneVGPR);
411for (MachineBasicBlock &MBB : MF) {
412MBB.addLiveIn(LaneVGPR);
413MBB.sortUniqueLiveIns();
414 }
415 SpillPhysVGPRs.push_back(LaneVGPR);
416 }else {
417 LaneVGPR = SpillPhysVGPRs.back();
418 }
419
420 SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
421returntrue;
422}
423
424boolSIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(
425MachineFunction &MF,int FI,bool SpillToPhysVGPRLane,
426bool IsPrologEpilog) {
427 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
428 SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
429 : SGPRSpillsToVirtualVGPRLanes[FI];
430
431// This has already been allocated.
432if (!SpillLanes.empty())
433returntrue;
434
435constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
436MachineFrameInfo &FrameInfo = MF.getFrameInfo();
437unsigned WaveSize = ST.getWavefrontSize();
438
439unsignedSize = FrameInfo.getObjectSize(FI);
440unsigned NumLanes =Size / 4;
441
442if (NumLanes > WaveSize)
443returnfalse;
444
445assert(Size >= 4 &&"invalid sgpr spill size");
446assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
447"not spilling SGPRs to VGPRs");
448
449unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
450 : NumVirtualVGPRSpillLanes;
451
452for (unsignedI = 0;I < NumLanes; ++I, ++NumSpillLanes) {
453unsigned LaneIndex = (NumSpillLanes % WaveSize);
454
455bool Allocated = SpillToPhysVGPRLane
456 ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
457 IsPrologEpilog)
458 : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
459if (!Allocated) {
460 NumSpillLanes -=I;
461returnfalse;
462 }
463 }
464
465returntrue;
466}
467
468/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
469/// Either AGPR is spilled to VGPR to vice versa.
470/// Returns true if a \p FI can be eliminated completely.
471boolSIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
472int FI,
473bool isAGPRtoVGPR) {
474MachineRegisterInfo &MRI = MF.getRegInfo();
475MachineFrameInfo &FrameInfo = MF.getFrameInfo();
476constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
477
478assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
479
480auto &Spill = VGPRToAGPRSpills[FI];
481
482// This has already been allocated.
483if (!Spill.Lanes.empty())
484return Spill.FullyAllocated;
485
486unsignedSize = FrameInfo.getObjectSize(FI);
487unsigned NumLanes =Size / 4;
488 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
489
490constTargetRegisterClass &RC =
491 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
492auto Regs = RC.getRegisters();
493
494auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
495constSIRegisterInfo *TRI = ST.getRegisterInfo();
496 Spill.FullyAllocated =true;
497
498// FIXME: Move allocation logic out of MachineFunctionInfo and initialize
499// once.
500BitVector OtherUsedRegs;
501 OtherUsedRegs.resize(TRI->getNumRegs());
502
503constuint32_t *CSRMask =
504TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
505if (CSRMask)
506 OtherUsedRegs.setBitsInMask(CSRMask);
507
508// TODO: Should include register tuples, but doesn't matter with current
509// usage.
510for (MCPhysReg Reg : SpillAGPR)
511 OtherUsedRegs.set(Reg);
512for (MCPhysReg Reg : SpillVGPR)
513 OtherUsedRegs.set(Reg);
514
515SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
516for (intI = NumLanes - 1;I >= 0; --I) {
517 NextSpillReg = std::find_if(
518 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
519 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
520 !OtherUsedRegs[Reg];
521 });
522
523if (NextSpillReg == Regs.end()) {// Registers exhausted
524 Spill.FullyAllocated =false;
525break;
526 }
527
528 OtherUsedRegs.set(*NextSpillReg);
529 SpillRegs.push_back(*NextSpillReg);
530MRI.reserveReg(*NextSpillReg,TRI);
531 Spill.Lanes[I] = *NextSpillReg++;
532 }
533
534return Spill.FullyAllocated;
535}
536
537boolSIMachineFunctionInfo::removeDeadFrameIndices(
538MachineFrameInfo &MFI,bool ResetSGPRSpillStackIDs) {
539// Remove dead frame indices from function frame, however keep FP & BP since
540// spills for them haven't been inserted yet. And also make sure to remove the
541// frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
542// otherwise, it could result in an unexpected side effect and bug, in case of
543// any re-mapping of freed frame indices by later pass(es) like "stack slot
544// coloring".
545for (auto &R :make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {
546 MFI.RemoveStackObject(R.first);
547 SGPRSpillsToVirtualVGPRLanes.erase(R.first);
548 }
549
550// Remove the dead frame indices of CSR SGPRs which are spilled to physical
551// VGPR lanes during SILowerSGPRSpills pass.
552if (!ResetSGPRSpillStackIDs) {
553for (auto &R :make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {
554 MFI.RemoveStackObject(R.first);
555 SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
556 }
557 }
558bool HaveSGPRToMemory =false;
559
560if (ResetSGPRSpillStackIDs) {
561// All other SGPRs must be allocated on the default stack, so reset the
562// stack ID.
563for (intI = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();I != E;
564 ++I) {
565if (!checkIndexInPrologEpilogSGPRSpills(I)) {
566if (MFI.getStackID(I) ==TargetStackID::SGPRSpill) {
567 MFI.setStackID(I,TargetStackID::Default);
568 HaveSGPRToMemory =true;
569 }
570 }
571 }
572 }
573
574for (auto &R : VGPRToAGPRSpills) {
575if (R.second.IsDead)
576 MFI.RemoveStackObject(R.first);
577 }
578
579return HaveSGPRToMemory;
580}
581
582intSIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
583constSIRegisterInfo &TRI) {
584if (ScavengeFI)
585return *ScavengeFI;
586
587 ScavengeFI =
588 MFI.CreateStackObject(TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
589TRI.getSpillAlign(AMDGPU::SGPR_32RegClass),false);
590return *ScavengeFI;
591}
592
593MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const{
594assert(NumSystemSGPRs == 0 &&"System SGPRs must be added after user SGPRs");
595return AMDGPU::SGPR0 + NumUserSGPRs;
596}
597
598MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const{
599return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
600}
601
602void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {
603 VRegFlags.grow(Reg);
604}
605
606void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,
607Register SrcReg) {
608 VRegFlags.grow(NewReg);
609 VRegFlags[NewReg] = VRegFlags[SrcReg];
610}
611
612Register
613SIMachineFunctionInfo::getGITPtrLoReg(constMachineFunction &MF) const{
614constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
615if (!ST.isAmdPalOS())
616returnRegister();
617Register GitPtrLo = AMDGPU::SGPR0;// Low GIT address passed in
618if (ST.hasMergedShaders()) {
619switch (MF.getFunction().getCallingConv()) {
620caseCallingConv::AMDGPU_HS:
621caseCallingConv::AMDGPU_GS:
622// Low GIT address is passed in s8 rather than s0 for an LS+HS or
623// ES+GS merged shader on gfx9+.
624 GitPtrLo = AMDGPU::SGPR8;
625return GitPtrLo;
626default:
627return GitPtrLo;
628 }
629 }
630return GitPtrLo;
631}
632
633staticyaml::StringValueregToString(Register Reg,
634constTargetRegisterInfo &TRI) {
635yaml::StringValue Dest;
636 {
637raw_string_ostreamOS(Dest.Value);
638OS <<printReg(Reg, &TRI);
639 }
640return Dest;
641}
642
643static std::optional<yaml::SIArgumentInfo>
644convertArgumentInfo(constAMDGPUFunctionArgInfo &ArgInfo,
645constTargetRegisterInfo &TRI) {
646yaml::SIArgumentInfo AI;
647
648auto convertArg = [&](std::optional<yaml::SIArgument> &A,
649constArgDescriptor &Arg) {
650if (!Arg)
651returnfalse;
652
653// Create a register or stack argument.
654yaml::SIArgument SA =yaml::SIArgument::createArgument(Arg.isRegister());
655if (Arg.isRegister()) {
656raw_string_ostreamOS(SA.RegisterName.Value);
657OS <<printReg(Arg.getRegister(), &TRI);
658 }else
659 SA.StackOffset = Arg.getStackOffset();
660// Check and update the optional mask.
661if (Arg.isMasked())
662 SA.Mask = Arg.getMask();
663
664A = SA;
665returntrue;
666 };
667
668// TODO: Need to serialize kernarg preloads.
669boolAny =false;
670Any |= convertArg(AI.PrivateSegmentBuffer,ArgInfo.PrivateSegmentBuffer);
671Any |= convertArg(AI.DispatchPtr,ArgInfo.DispatchPtr);
672Any |= convertArg(AI.QueuePtr,ArgInfo.QueuePtr);
673Any |= convertArg(AI.KernargSegmentPtr,ArgInfo.KernargSegmentPtr);
674Any |= convertArg(AI.DispatchID,ArgInfo.DispatchID);
675Any |= convertArg(AI.FlatScratchInit,ArgInfo.FlatScratchInit);
676Any |= convertArg(AI.LDSKernelId,ArgInfo.LDSKernelId);
677Any |= convertArg(AI.PrivateSegmentSize,ArgInfo.PrivateSegmentSize);
678Any |= convertArg(AI.WorkGroupIDX,ArgInfo.WorkGroupIDX);
679Any |= convertArg(AI.WorkGroupIDY,ArgInfo.WorkGroupIDY);
680Any |= convertArg(AI.WorkGroupIDZ,ArgInfo.WorkGroupIDZ);
681Any |= convertArg(AI.WorkGroupInfo,ArgInfo.WorkGroupInfo);
682Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
683ArgInfo.PrivateSegmentWaveByteOffset);
684Any |= convertArg(AI.ImplicitArgPtr,ArgInfo.ImplicitArgPtr);
685Any |= convertArg(AI.ImplicitBufferPtr,ArgInfo.ImplicitBufferPtr);
686Any |= convertArg(AI.WorkItemIDX,ArgInfo.WorkItemIDX);
687Any |= convertArg(AI.WorkItemIDY,ArgInfo.WorkItemIDY);
688Any |= convertArg(AI.WorkItemIDZ,ArgInfo.WorkItemIDZ);
689
690if (Any)
691return AI;
692
693return std::nullopt;
694}
695
696yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
697constllvm::SIMachineFunctionInfo &MFI,constTargetRegisterInfo &TRI,
698constllvm::MachineFunction &MF)
699 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
700 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
701 GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()),
702 IsEntryFunction(MFI.isEntryFunction()),
703 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
704 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
705 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
706 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
707 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
708 Occupancy(MFI.getOccupancy()),
709 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(),TRI)),
710 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(),TRI)),
711 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(),TRI)),
712 BytesInStackArgArea(MFI.getBytesInStackArgArea()),
713 ReturnsVoid(MFI.returnsVoid()),
714ArgInfo(convertArgumentInfo(MFI.getArgInfo(),TRI)),
715 PSInputAddr(MFI.getPSInputAddr()), PSInputEnable(MFI.getPSInputEnable()),
716 MaxMemoryClusterDWords(MFI.getMaxMemoryClusterDWords()),
717 Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()) {
718for (Register Reg : MFI.getSGPRSpillPhysVGPRs())
719SpillPhysVGPRS.push_back(regToString(Reg,TRI));
720
721for (Register Reg : MFI.getWWMReservedRegs())
722WWMReservedRegs.push_back(regToString(Reg,TRI));
723
724if (MFI.getLongBranchReservedReg())
725LongBranchReservedReg =regToString(MFI.getLongBranchReservedReg(),TRI);
726if (MFI.getVGPRForAGPRCopy())
727VGPRForAGPRCopy =regToString(MFI.getVGPRForAGPRCopy(),TRI);
728
729if (MFI.getSGPRForEXECCopy())
730SGPRForEXECCopy =regToString(MFI.getSGPRForEXECCopy(),TRI);
731
732auto SFI = MFI.getOptionalScavengeFI();
733if (SFI)
734ScavengeFI =yaml::FrameIndex(*SFI, MF.getFrameInfo());
735}
736
737voidyaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
738MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
739}
740
741boolSIMachineFunctionInfo::initializeBaseYamlFields(
742constyaml::SIMachineFunctionInfo &YamlMFI,constMachineFunction &MF,
743PerFunctionMIParsingState &PFS,SMDiagnostic &Error,SMRange &SourceRange) {
744ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
745MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
746LDSSize = YamlMFI.LDSSize;
747GDSSize = YamlMFI.GDSSize;
748DynLDSAlign = YamlMFI.DynLDSAlign;
749PSInputAddr = YamlMFI.PSInputAddr;
750PSInputEnable = YamlMFI.PSInputEnable;
751MaxMemoryClusterDWords = YamlMFI.MaxMemoryClusterDWords;
752HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
753Occupancy = YamlMFI.Occupancy;
754IsEntryFunction = YamlMFI.IsEntryFunction;
755NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
756MemoryBound = YamlMFI.MemoryBound;
757WaveLimiter = YamlMFI.WaveLimiter;
758HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
759HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
760BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
761ReturnsVoid = YamlMFI.ReturnsVoid;
762
763if (YamlMFI.ScavengeFI) {
764auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
765if (!FIOrErr) {
766// Create a diagnostic for a the frame index.
767constMemoryBuffer &Buffer =
768 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
769
770Error =SMDiagnostic(*PFS.SM,SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
771SourceMgr::DK_Error,toString(FIOrErr.takeError()),
772"", {}, {});
773 SourceRange = YamlMFI.ScavengeFI->SourceRange;
774returntrue;
775 }
776ScavengeFI = *FIOrErr;
777 }else {
778ScavengeFI = std::nullopt;
779 }
780returnfalse;
781}
782
783boolSIMachineFunctionInfo::mayUseAGPRs(constFunction &F) const{
784return !F.hasFnAttribute("amdgpu-no-agpr");
785}
786
787boolSIMachineFunctionInfo::usesAGPRs(constMachineFunction &MF) const{
788if (UsesAGPRs)
789return *UsesAGPRs;
790
791if (!mayNeedAGPRs()) {
792 UsesAGPRs =false;
793returnfalse;
794 }
795
796if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
797 MF.getFrameInfo().hasCalls()) {
798 UsesAGPRs =true;
799returntrue;
800 }
801
802constMachineRegisterInfo &MRI = MF.getRegInfo();
803
804for (unsignedI = 0, E =MRI.getNumVirtRegs();I != E; ++I) {
805constRegister Reg =Register::index2VirtReg(I);
806constTargetRegisterClass *RC =MRI.getRegClassOrNull(Reg);
807if (RC &&SIRegisterInfo::isAGPRClass(RC)) {
808 UsesAGPRs =true;
809returntrue;
810 }
811if (!RC && !MRI.use_empty(Reg) &&MRI.getType(Reg).isValid()) {
812// Defer caching UsesAGPRs, function might not yet been regbank selected.
813returntrue;
814 }
815 }
816
817for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
818if (MRI.isPhysRegUsed(Reg)) {
819 UsesAGPRs =true;
820returntrue;
821 }
822 }
823
824 UsesAGPRs =false;
825returnfalse;
826}
MRI
unsigned const MachineRegisterInfo * MRI
Definition:AArch64AdvSIMDScalarPass.cpp:105
AMDGPUBaseInfo.h
AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.
AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.
MBB
MachineBasicBlock & MBB
Definition:ARMSLSHardening.cpp:71
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
CallingConv.h
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
DiagnosticInfo.h
Size
uint64_t Size
Definition:ELFObjHandler.cpp:81
YamlIO
IO & YamlIO
Definition:ELFYAML.cpp:1314
GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.
Function.h
LiveIntervals.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
MIParser.h
MachineBasicBlock.h
MachineFrameInfo.h
MachineFunction.h
MachineRegisterInfo.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition:MachineSink.cpp:2029
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MAX_LANES
@ MAX_LANES
Definition:SIMachineFunctionInfo.cpp:28
getTM
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
Definition:SIMachineFunctionInfo.cpp:32
convertArgumentInfo
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:644
regToString
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:633
SIMachineFunctionInfo.h
SIRegisterInfo.h
Interface definition for SIRegisterInfo.
OS
raw_pwrite_stream & OS
Definition:SampleProfWriter.cpp:51
llvm::AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
Definition:AMDGPUArgumentUsageInfo.h:182
llvm::AMDGPUMachineFunction
Definition:AMDGPUMachineFunction.h:24
llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition:AMDGPUMachineFunction.h:81
llvm::AMDGPUMachineFunction::MaxKernArgAlign
Align MaxKernArgAlign
Definition:AMDGPUMachineFunction.h:31
llvm::AMDGPUMachineFunction::isChainFunction
bool isChainFunction() const
Definition:AMDGPUMachineFunction.h:95
llvm::AMDGPUMachineFunction::hasInitWholeWave
bool hasInitWholeWave() const
Definition:AMDGPUMachineFunction.h:114
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition:AMDGPUMachineFunction.h:89
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition:AMDGPUSubtarget.h:40
llvm::Any
Definition:Any.h:28
llvm::Attribute
Definition:Attributes.h:67
llvm::BitVector
Definition:BitVector.h:82
llvm::BitVector::reset
BitVector & reset()
Definition:BitVector.h:392
llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition:BitVector.h:341
llvm::BitVector::set
BitVector & set()
Definition:BitVector.h:351
llvm::BitVector::setBitsInMask
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Definition:BitVector.h:707
llvm::BitVector::push_back
void push_back(bool Val)
Definition:BitVector.h:466
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition:Allocator.h:66
llvm::DenseMap
Definition:DenseMap.h:727
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition:Error.h:160
llvm::Function
Definition:Function.h:63
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition:Function.h:277
llvm::GCNSubtarget
Definition:GCNSubtarget.h:34
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition:GCNSubtarget.h:287
llvm::GCNTargetMachine
Definition:AMDGPUTargetMachine.h:80
llvm::GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Definition:GCNSubtarget.cpp:668
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition:MCRegister.h:33
llvm::MachineBasicBlock
Definition:MachineBasicBlock.h:125
llvm::MachineBasicBlock::removeLiveIn
void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
Definition:MachineBasicBlock.cpp:600
llvm::MachineBasicBlock::sortUniqueLiveIns
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition:MachineBasicBlock.cpp:624
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition:MachineBasicBlock.h:456
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition:MachineFrameInfo.h:106
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition:MachineFrameInfo.cpp:51
llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition:MachineFrameInfo.h:621
llvm::MachineFrameInfo::CreateSpillStackObject
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Definition:MachineFrameInfo.cpp:66
llvm::MachineFrameInfo::setStackID
void setStackID(int ObjectIdx, uint8_t ID)
Definition:MachineFrameInfo.h:755
llvm::MachineFrameInfo::hasTailCall
bool hasTailCall() const
Returns true if the function contains a tail call.
Definition:MachineFrameInfo.h:646
llvm::MachineFrameInfo::isSpillSlotObjectIndex
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
Definition:MachineFrameInfo.h:737
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition:MachineFrameInfo.h:472
llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition:MachineFrameInfo.h:795
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition:MachineFrameInfo.h:412
llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition:MachineFrameInfo.h:750
llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition:MachineFrameInfo.h:409
llvm::MachineFunction
Definition:MachineFunction.h:267
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition:MachineFunction.h:733
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition:MachineFunction.h:749
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition:MachineFunction.h:743
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition:MachineFunction.h:704
llvm::MachineFunction::cloneInfo
Ty * cloneInfo(const Ty &Old)
Definition:MachineFunction.h:840
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition:MachineRegisterInfo.h:51
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition:MachineRegisterInfo.cpp:635
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition:MapVector.h:165
llvm::MapVector::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition:MapVector.h:141
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition:MemoryBuffer.h:51
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition:MemoryBuffer.h:76
llvm::Register
Wrapper class representing virtual and physical registers.
Definition:Register.h:19
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition:Register.h:84
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition:SIMachineFunctionInfo.h:390
llvm::SIMachineFunctionInfo::usesAGPRs
bool usesAGPRs(const MachineFunction &MF) const
Definition:SIMachineFunctionInfo.cpp:787
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition:SIMachineFunctionInfo.cpp:741
llvm::SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
Definition:SIMachineFunctionInfo.cpp:332
llvm::SIMachineFunctionInfo::addPrivateSegmentSize
Register addPrivateSegmentSize(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:236
llvm::SIMachineFunctionInfo::allocateWWMSpill
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Definition:SIMachineFunctionInfo.cpp:284
llvm::SIMachineFunctionInfo::addDispatchPtr
Register addDispatchPtr(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:200
llvm::SIMachineFunctionInfo::getLongBranchReservedReg
Register getLongBranchReservedReg() const
Definition:SIMachineFunctionInfo.h:973
llvm::SIMachineFunctionInfo::addFlatScratchInit
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:229
llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition:SIMachineFunctionInfo.h:1087
llvm::SIMachineFunctionInfo::getSGPRSpillPhysVGPRs
ArrayRef< Register > getSGPRSpillPhysVGPRs() const
Definition:SIMachineFunctionInfo.h:626
llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:582
llvm::SIMachineFunctionInfo::addQueuePtr
Register addQueuePtr(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:207
llvm::SIMachineFunctionInfo::SIMachineFunctionInfo
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition:SIMachineFunctionInfo.cpp:613
llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition:SIMachineFunctionInfo.cpp:471
llvm::SIMachineFunctionInfo::splitWWMSpillRegisters
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Definition:SIMachineFunctionInfo.cpp:309
llvm::SIMachineFunctionInfo::getSGPRForEXECCopy
Register getSGPRForEXECCopy() const
Definition:SIMachineFunctionInfo.h:738
llvm::SIMachineFunctionInfo::mayUseAGPRs
bool mayUseAGPRs(const Function &F) const
Definition:SIMachineFunctionInfo.cpp:783
llvm::SIMachineFunctionInfo::isCalleeSavedReg
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Definition:SIMachineFunctionInfo.cpp:322
llvm::SIMachineFunctionInfo::addLDSKernelId
Register addLDSKernelId()
Definition:SIMachineFunctionInfo.cpp:249
llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy
Register getVGPRForAGPRCopy() const
Definition:SIMachineFunctionInfo.h:581
llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Definition:SIMachineFunctionInfo.cpp:424
llvm::SIMachineFunctionInfo::addKernargSegmentPtr
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:214
llvm::SIMachineFunctionInfo::addDispatchID
Register addDispatchID(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:222
llvm::SIMachineFunctionInfo::removeDeadFrameIndices
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
Definition:SIMachineFunctionInfo.cpp:537
llvm::SIMachineFunctionInfo::clone
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
Definition:SIMachineFunctionInfo.cpp:178
llvm::SIMachineFunctionInfo::checkIndexInPrologEpilogSGPRSpills
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Definition:SIMachineFunctionInfo.h:683
llvm::SIMachineFunctionInfo::addPrivateSegmentBuffer
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:191
llvm::SIMachineFunctionInfo::getWWMReservedRegs
const ReservedRegSet & getWWMReservedRegs() const
Definition:SIMachineFunctionInfo.h:629
llvm::SIMachineFunctionInfo::getOptionalScavengeFI
std::optional< int > getOptionalScavengeFI() const
Definition:SIMachineFunctionInfo.h:775
llvm::SIMachineFunctionInfo::addImplicitBufferPtr
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
Definition:SIMachineFunctionInfo.cpp:242
llvm::SIMachineFunctionInfo::limitOccupancy
void limitOccupancy(const MachineFunction &MF)
Definition:SIMachineFunctionInfo.cpp:185
llvm::SIMachineFunctionInfo::addPreloadedKernArg
SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)
Definition:SIMachineFunctionInfo.cpp:255
llvm::SIMachineFunctionInfo::reserveWWMRegister
void reserveWWMRegister(Register Reg)
Definition:SIMachineFunctionInfo.h:605
llvm::SIRegisterInfo
Definition:SIRegisterInfo.h:32
llvm::SIRegisterInfo::isChainScratchRegister
static bool isChainScratchRegister(Register VGPR)
Definition:SIRegisterInfo.cpp:452
llvm::SIRegisterInfo::isAGPRClass
static bool isAGPRClass(const TargetRegisterClass *RC)
Definition:SIRegisterInfo.h:223
llvm::SITargetLowering
Definition:SIISelLowering.h:31
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition:SourceMgr.h:281
llvm::SMLoc
Represents a location in source code.
Definition:SMLoc.h:23
llvm::SMRange
Represents a range in source code.
Definition:SMLoc.h:48
llvm::SetVector::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition:SetVector.h:188
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition:SetVector.h:162
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition:SmallVector.h:578
llvm::SmallVectorTemplateCommon::end
iterator end()
Definition:SmallVector.h:269
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition:SourceMgr.h:132
llvm::SourceMgr::DK_Error
@ DK_Error
Definition:SourceMgr.h:34
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition:SourceMgr.h:125
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StringRef::consumeInteger
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition:StringRef.h:499
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition:StringRef.h:147
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition:TargetLowering.h:364
llvm::TargetRegisterClass
Definition:TargetRegisterInfo.h:44
llvm::TargetRegisterClass::getRegisters
ArrayRef< MCPhysReg > getRegisters() const
Definition:TargetRegisterInfo.h:83
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition:TargetRegisterInfo.h:235
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition:raw_ostream.h:661
uint16_t
uint32_t
uint64_t
unsigned
false
Definition:StackSlotColoring.cpp:193
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2066
llvm::AMDGPU::isChainCC
bool isChainCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2092
llvm::AMDGPU::getInitialPSInputAddr
unsigned getInitialPSInputAddr(const Function &F)
Definition:AMDGPUBaseInfo.cpp:2026
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition:AMDGPUBaseInfo.cpp:2058
llvm::ARM_MB::ST
@ ST
Definition:ARMBaseInfo.h:73
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition:CallingConv.h:197
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition:CallingConv.h:200
llvm::CallingConv::AMDGPU_Gfx
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition:CallingConv.h:232
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition:CallingConv.h:206
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition:CallingConv.h:191
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition:CallingConv.h:194
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition:CallingConv.h:144
llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition:TargetFrameLowering.h:31
llvm::TargetStackID::Default
@ Default
Definition:TargetFrameLowering.h:30
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition:MCRegister.h:21
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition:STLExtras.h:657
llvm::DefaultMemoryClusterDWordsLimit
constexpr unsigned DefaultMemoryClusterDWordsLimit
Definition:SIInstrInfo.h:39
llvm::toString
const char * toString(DWARFSectionKind Kind)
Definition:DWARFUnitIndex.h:67
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition:TargetRegisterInfo.cpp:107
llvm::AMDGPUFunctionArgInfo
Definition:AMDGPUArgumentUsageInfo.h:103
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::ArgDescriptor
Definition:AMDGPUArgumentUsageInfo.h:25
llvm::ArgDescriptor::createRegister
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition:AMDGPUArgumentUsageInfo.h:46
llvm::ArgInfo
Helper struct shared between Function Specialization and SCCP Solver.
Definition:SCCPSolver.h:41
llvm::MachineFunctionInfo
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
Definition:MachineFunction.h:104
llvm::PerFunctionMIParsingState
Definition:MIParser.h:165
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition:MIParser.h:168
llvm::yaml::FrameIndex
A serializaable representation of a reference to a stack object or fixed stack object.
Definition:MIRYamlMapping.h:417
llvm::yaml::MappingTraits
Definition:ModuleSummaryIndex.h:54
llvm::yaml::SIArgumentInfo
Definition:SIMachineFunctionInfo.h:165
llvm::yaml::SIArgumentInfo::PrivateSegmentWaveByteOffset
std::optional< SIArgument > PrivateSegmentWaveByteOffset
Definition:SIMachineFunctionInfo.h:179
llvm::yaml::SIArgumentInfo::WorkGroupIDY
std::optional< SIArgument > WorkGroupIDY
Definition:SIMachineFunctionInfo.h:175
llvm::yaml::SIArgumentInfo::FlatScratchInit
std::optional< SIArgument > FlatScratchInit
Definition:SIMachineFunctionInfo.h:171
llvm::yaml::SIArgumentInfo::DispatchPtr
std::optional< SIArgument > DispatchPtr
Definition:SIMachineFunctionInfo.h:167
llvm::yaml::SIArgumentInfo::DispatchID
std::optional< SIArgument > DispatchID
Definition:SIMachineFunctionInfo.h:170
llvm::yaml::SIArgumentInfo::WorkItemIDY
std::optional< SIArgument > WorkItemIDY
Definition:SIMachineFunctionInfo.h:185
llvm::yaml::SIArgumentInfo::WorkGroupIDX
std::optional< SIArgument > WorkGroupIDX
Definition:SIMachineFunctionInfo.h:174
llvm::yaml::SIArgumentInfo::ImplicitArgPtr
std::optional< SIArgument > ImplicitArgPtr
Definition:SIMachineFunctionInfo.h:181
llvm::yaml::SIArgumentInfo::QueuePtr
std::optional< SIArgument > QueuePtr
Definition:SIMachineFunctionInfo.h:168
llvm::yaml::SIArgumentInfo::WorkGroupInfo
std::optional< SIArgument > WorkGroupInfo
Definition:SIMachineFunctionInfo.h:177
llvm::yaml::SIArgumentInfo::LDSKernelId
std::optional< SIArgument > LDSKernelId
Definition:SIMachineFunctionInfo.h:178
llvm::yaml::SIArgumentInfo::ImplicitBufferPtr
std::optional< SIArgument > ImplicitBufferPtr
Definition:SIMachineFunctionInfo.h:182
llvm::yaml::SIArgumentInfo::WorkItemIDX
std::optional< SIArgument > WorkItemIDX
Definition:SIMachineFunctionInfo.h:184
llvm::yaml::SIArgumentInfo::KernargSegmentPtr
std::optional< SIArgument > KernargSegmentPtr
Definition:SIMachineFunctionInfo.h:169
llvm::yaml::SIArgumentInfo::WorkItemIDZ
std::optional< SIArgument > WorkItemIDZ
Definition:SIMachineFunctionInfo.h:186
llvm::yaml::SIArgumentInfo::PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentSize
Definition:SIMachineFunctionInfo.h:172
llvm::yaml::SIArgumentInfo::PrivateSegmentBuffer
std::optional< SIArgument > PrivateSegmentBuffer
Definition:SIMachineFunctionInfo.h:166
llvm::yaml::SIArgumentInfo::WorkGroupIDZ
std::optional< SIArgument > WorkGroupIDZ
Definition:SIMachineFunctionInfo.h:176
llvm::yaml::SIArgument
Definition:SIMachineFunctionInfo.h:91
llvm::yaml::SIArgument::StackOffset
unsigned StackOffset
Definition:SIMachineFunctionInfo.h:95
llvm::yaml::SIArgument::Mask
std::optional< unsigned > Mask
Definition:SIMachineFunctionInfo.h:97
llvm::yaml::SIArgument::RegisterName
StringValue RegisterName
Definition:SIMachineFunctionInfo.h:94
llvm::yaml::SIArgument::createArgument
static SIArgument createArgument(bool IsReg)
Definition:SIMachineFunctionInfo.h:132
llvm::yaml::SIMachineFunctionInfo
Definition:SIMachineFunctionInfo.h:260
llvm::yaml::SIMachineFunctionInfo::MaxMemoryClusterDWords
unsigned MaxMemoryClusterDWords
Definition:SIMachineFunctionInfo.h:292
llvm::yaml::SIMachineFunctionInfo::HasSpilledVGPRs
bool HasSpilledVGPRs
Definition:SIMachineFunctionInfo.h:272
llvm::yaml::SIMachineFunctionInfo::SGPRForEXECCopy
StringValue SGPRForEXECCopy
Definition:SIMachineFunctionInfo.h:297
llvm::yaml::SIMachineFunctionInfo::HasSpilledSGPRs
bool HasSpilledSGPRs
Definition:SIMachineFunctionInfo.h:271
llvm::yaml::SIMachineFunctionInfo::ReturnsVoid
bool ReturnsVoid
Definition:SIMachineFunctionInfo.h:286
llvm::yaml::SIMachineFunctionInfo::DynLDSAlign
Align DynLDSAlign
Definition:SIMachineFunctionInfo.h:265
llvm::yaml::SIMachineFunctionInfo::WWMReservedRegs
SmallVector< StringValue > WWMReservedRegs
Definition:SIMachineFunctionInfo.h:279
llvm::yaml::SIMachineFunctionInfo::HighBitsOf32BitAddress
uint32_t HighBitsOf32BitAddress
Definition:SIMachineFunctionInfo.h:273
llvm::yaml::SIMachineFunctionInfo::GDSSize
uint32_t GDSSize
Definition:SIMachineFunctionInfo.h:264
llvm::yaml::SIMachineFunctionInfo::MemoryBound
bool MemoryBound
Definition:SIMachineFunctionInfo.h:269
llvm::yaml::SIMachineFunctionInfo::Occupancy
unsigned Occupancy
Definition:SIMachineFunctionInfo.h:276
llvm::yaml::SIMachineFunctionInfo::PSInputEnable
unsigned PSInputEnable
Definition:SIMachineFunctionInfo.h:291
llvm::yaml::SIMachineFunctionInfo::MaxKernArgAlign
Align MaxKernArgAlign
Definition:SIMachineFunctionInfo.h:262
llvm::yaml::SIMachineFunctionInfo::SIMachineFunctionInfo
SIMachineFunctionInfo()=default
llvm::yaml::SIMachineFunctionInfo::LongBranchReservedReg
StringValue LongBranchReservedReg
Definition:SIMachineFunctionInfo.h:298
llvm::yaml::SIMachineFunctionInfo::ExplicitKernArgSize
uint64_t ExplicitKernArgSize
Definition:SIMachineFunctionInfo.h:261
llvm::yaml::SIMachineFunctionInfo::LDSSize
uint32_t LDSSize
Definition:SIMachineFunctionInfo.h:263
llvm::yaml::SIMachineFunctionInfo::WaveLimiter
bool WaveLimiter
Definition:SIMachineFunctionInfo.h:270
llvm::yaml::SIMachineFunctionInfo::PSInputAddr
unsigned PSInputAddr
Definition:SIMachineFunctionInfo.h:290
llvm::yaml::SIMachineFunctionInfo::mappingImpl
void mappingImpl(yaml::IO &YamlIO) override
Definition:SIMachineFunctionInfo.cpp:737
llvm::yaml::SIMachineFunctionInfo::NoSignedZerosFPMath
bool NoSignedZerosFPMath
Definition:SIMachineFunctionInfo.h:268
llvm::yaml::SIMachineFunctionInfo::IsEntryFunction
bool IsEntryFunction
Definition:SIMachineFunctionInfo.h:266
llvm::yaml::SIMachineFunctionInfo::VGPRForAGPRCopy
StringValue VGPRForAGPRCopy
Definition:SIMachineFunctionInfo.h:296
llvm::yaml::SIMachineFunctionInfo::SpillPhysVGPRS
SmallVector< StringValue, 2 > SpillPhysVGPRS
Definition:SIMachineFunctionInfo.h:278
llvm::yaml::SIMachineFunctionInfo::ScavengeFI
std::optional< FrameIndex > ScavengeFI
Definition:SIMachineFunctionInfo.h:295
llvm::yaml::SIMachineFunctionInfo::BytesInStackArgArea
unsigned BytesInStackArgArea
Definition:SIMachineFunctionInfo.h:285
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition:MIRYamlMapping.h:34
llvm::yaml::StringValue::Value
std::string Value
Definition:MIRYamlMapping.h:35

Generated on Fri Jul 18 2025 13:25:36 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp