1//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 40 UserSGPRInfo(
F, *STI), WorkGroupIDX(
false), WorkGroupIDY(
false),
42 PrivateSegmentWaveByteOffset(
false), WorkItemIDX(
false),
44 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
46 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(
F);
47 WavesPerEU = ST.getWavesPerEU(
F);
48 MaxNumWorkGroups = ST.getMaxNumWorkGroups(
F);
51 Occupancy = ST.computeOccupancy(
F,
getLDSSize()).second;
54 VRegFlags.reserve(1024);
66 MayNeedAGPRs = ST.hasMAIInsts();
69// Chain functions don't receive an SP from their caller, but are free to 70// set one up. For now, we can use s32 to match what amdgpu_gfx functions 71// would use if called, but this can be revisited. 72// FIXME: Only reserve this if we actually need it. 73 StackPtrOffsetReg = AMDGPU::SGPR32;
75 ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
80 ImplicitArgPtr =
false;
85 FrameOffsetReg = AMDGPU::SGPR33;
86 StackPtrOffsetReg = AMDGPU::SGPR32;
88if (!ST.enableFlatScratch()) {
89// Non-entry functions have no special inputs for now, other registers 90// required for scratch access. 91 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
97if (!
F.hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
100 ImplicitArgPtr =
false;
104if (ST.hasGFX90AInsts() &&
105 ST.getMaxNumVGPRs(
F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
107 MayNeedAGPRs =
false;
// We will select all MAI with VGPR operands. 112 ST.hasArchitectedSGPRs())) {
113if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workgroup-id-x"))
116if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-y"))
119if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-z"))
124if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workitem-id-x"))
127if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-y") &&
128 ST.getMaxWorkitemID(
F, 1) != 0)
131if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-z") &&
132 ST.getMaxWorkitemID(
F, 2) != 0)
135if (!IsKernel && !
F.hasFnAttribute(
"amdgpu-no-lds-kernel-id"))
140// X, XY, and XYZ are the only supported combinations, so make sure Y is 145if (!ST.flatScratchIsArchitected()) {
146 PrivateSegmentWaveByteOffset =
true;
148// HS and GS always have the scratch wave offset in SGPR5 on GFX9. 151ArgInfo.PrivateSegmentWaveByteOffset =
156AttributeA =
F.getFnAttribute(
"amdgpu-git-ptr-high");
161A =
F.getFnAttribute(
"amdgpu-32bit-address-high-bits");
162 S =
A.getValueAsString();
166 MaxMemoryClusterDWords =
F.getFnAttributeAsParsedInteger(
169// On GFX908, in order to guarantee copying between AGPRs, we need a scratch 170// VGPR available at all times. For now, reserve highest available VGPR. After 171// RA, shift it to the lowest available unused VGPR if the one exist. 172if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
174 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(
F) - 1);
195 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
197returnArgInfo.PrivateSegmentBuffer.getRegister();
202 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
204returnArgInfo.DispatchPtr.getRegister();
209 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
211returnArgInfo.QueuePtr.getRegister();
217 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219returnArgInfo.KernargSegmentPtr.getRegister();
224 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
226returnArgInfo.DispatchID.getRegister();
231 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
233returnArgInfo.FlatScratchInit.getRegister();
239returnArgInfo.PrivateSegmentSize.getRegister();
244 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
246returnArgInfo.ImplicitBufferPtr.getRegister();
252returnArgInfo.LDSKernelId.getRegister();
257unsigned AllocSizeDWord,
int KernArgIdx,
int PaddingSGPRs) {
259"Preload kernel argument allocated twice.");
260 NumUserSGPRs += PaddingSGPRs;
261// If the available register tuples are aligned with the kernarg to be 262// preloaded use that register, otherwise we need to use a set of SGPRs and 264if (!
ArgInfo.FirstKernArgPreloadReg)
265ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();
267TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
269 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
270ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);
271 NumUserSGPRs += AllocSizeDWord;
273for (
unsignedI = 0;
I < AllocSizeDWord; ++
I) {
274ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());
279// Track the actual number of SGPRs that HW will preload to. 281return &
ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
286// Skip if it is an entry function or the register is already added. 290// Skip if this is a function with the amdgpu_cs_chain or 291// amdgpu_cs_chain_preserve calling convention and this is a scratch register. 292// We never need to allocate a spill for these because we don't even need to 293// restore the inactive lanes for them (they're scratchier than the usual 294// scratch registers). We only need to do this if we have calls to 295// llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since 296// chain functions do not return) and the function did not contain a call to 297// llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes 298// when entering the function). 304 WWMSpills.
insert(std::make_pair(
308// Separate out the callee-saved and scratch registers. 314for (
auto &Reg : WWMSpills) {
316 CalleeSavedRegs.push_back(Reg);
318 ScratchRegs.push_back(Reg);
324for (
unsignedI = 0; CSRegs[
I]; ++
I) {
337for (
unsignedI = 0, E = WWMVGPRs.
size();
I < E; ++
I) {
340TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
341if (!NewReg || NewReg >= Reg)
344MRI.replaceRegWith(Reg, NewReg);
346// Update various tables with the new VGPR. 347 WWMVGPRs[
I] = NewReg;
348 WWMReservedRegs.
remove(Reg);
349 WWMReservedRegs.
insert(NewReg);
350MRI.reserveReg(NewReg,
TRI);
352// Replace the register in SpillPhysVGPRs. This is needed to look for free 353// lanes while spilling special SGPRs like FP, BP, etc. during PEI. 354auto *RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg);
355if (RegItr != SpillPhysVGPRs.end()) {
356unsignedIdx = std::distance(SpillPhysVGPRs.begin(), RegItr);
357 SpillPhysVGPRs[
Idx] = NewReg;
360// The generic `determineCalleeSaves` might have set the old register if it 361// is in the CSR range. 362 SavedVGPRs.
reset(Reg);
373bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
378 LaneVGPR =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
379 SpillVGPRs.push_back(LaneVGPR);
381 LaneVGPR = SpillVGPRs.back();
384 SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
388bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
395// Find the highest available register if called before RA to ensure the 396// lowest registers are available for allocation. The LaneVGPR, in that 397// case, will be shifted back to the lowest range after VGPR allocation. 398 LaneVGPR =
TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF,
400if (LaneVGPR == AMDGPU::NoRegister) {
401// We have no VGPRs left for spilling SGPRs. Reset because we will not 402// partially spill the SGPR to VGPRs. 403 SGPRSpillsToPhysicalVGPRLanes.erase(FI);
415 SpillPhysVGPRs.push_back(LaneVGPR);
417 LaneVGPR = SpillPhysVGPRs.back();
420 SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
426bool IsPrologEpilog) {
427 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
428 SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
429 : SGPRSpillsToVirtualVGPRLanes[FI];
431// This has already been allocated. 432if (!SpillLanes.empty())
437unsigned WaveSize = ST.getWavefrontSize();
439unsignedSize = FrameInfo.getObjectSize(FI);
440unsigned NumLanes =
Size / 4;
442if (NumLanes > WaveSize)
446assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
447"not spilling SGPRs to VGPRs");
449unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
450 : NumVirtualVGPRSpillLanes;
452for (
unsignedI = 0;
I < NumLanes; ++
I, ++NumSpillLanes) {
453unsigned LaneIndex = (NumSpillLanes % WaveSize);
455bool Allocated = SpillToPhysVGPRLane
456 ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
458 : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
468/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 469/// Either AGPR is spilled to VGPR to vice versa. 470/// Returns true if a \p FI can be eliminated completely. 480auto &Spill = VGPRToAGPRSpills[FI];
482// This has already been allocated. 483if (!Spill.Lanes.empty())
484return Spill.FullyAllocated;
487unsigned NumLanes =
Size / 4;
488 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
491 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
494auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
496 Spill.FullyAllocated =
true;
498// FIXME: Move allocation logic out of MachineFunctionInfo and initialize 508// TODO: Should include register tuples, but doesn't matter with current 511 OtherUsedRegs.
set(Reg);
513 OtherUsedRegs.
set(Reg);
516for (
intI = NumLanes - 1;
I >= 0; --
I) {
517 NextSpillReg = std::find_if(
519 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
523if (NextSpillReg == Regs.
end()) {
// Registers exhausted 524 Spill.FullyAllocated =
false;
528 OtherUsedRegs.
set(*NextSpillReg);
530MRI.reserveReg(*NextSpillReg,
TRI);
531 Spill.Lanes[
I] = *NextSpillReg++;
534return Spill.FullyAllocated;
539// Remove dead frame indices from function frame, however keep FP & BP since 540// spills for them haven't been inserted yet. And also make sure to remove the 541// frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure, 542// otherwise, it could result in an unexpected side effect and bug, in case of 543// any re-mapping of freed frame indices by later pass(es) like "stack slot 547 SGPRSpillsToVirtualVGPRLanes.erase(R.first);
550// Remove the dead frame indices of CSR SGPRs which are spilled to physical 551// VGPR lanes during SILowerSGPRSpills pass. 552if (!ResetSGPRSpillStackIDs) {
555 SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
558bool HaveSGPRToMemory =
false;
560if (ResetSGPRSpillStackIDs) {
561// All other SGPRs must be allocated on the default stack, so reset the 568 HaveSGPRToMemory =
true;
574for (
auto &R : VGPRToAGPRSpills) {
579return HaveSGPRToMemory;
589TRI.getSpillAlign(AMDGPU::SGPR_32RegClass),
false);
593MCPhysReg SIMachineFunctionInfo::getNextUserSGPR()
const{
594assert(NumSystemSGPRs == 0 &&
"System SGPRs must be added after user SGPRs");
595return AMDGPU::SGPR0 + NumUserSGPRs;
598MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR()
const{
599return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
602void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(
Register Reg) {
606void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(
Register NewReg,
608 VRegFlags.grow(NewReg);
609 VRegFlags[NewReg] = VRegFlags[SrcReg];
617Register GitPtrLo = AMDGPU::SGPR0;
// Low GIT address passed in 618if (ST.hasMergedShaders()) {
622// Low GIT address is passed in s8 rather than s0 for an LS+HS or 623// ES+GS merged shader on gfx9+. 624 GitPtrLo = AMDGPU::SGPR8;
643static std::optional<yaml::SIArgumentInfo>
648auto convertArg = [&](std::optional<yaml::SIArgument> &
A,
653// Create a register or stack argument. 655if (Arg.isRegister()) {
660// Check and update the optional mask. 662 SA.
Mask = Arg.getMask();
668// TODO: Need to serialize kernarg preloads. 683ArgInfo.PrivateSegmentWaveByteOffset);
699 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
700 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
701 GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()),
702 IsEntryFunction(MFI.isEntryFunction()),
703 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
704 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
705 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
706 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
707 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
708 Occupancy(MFI.getOccupancy()),
712 BytesInStackArgArea(MFI.getBytesInStackArgArea()),
713 ReturnsVoid(MFI.returnsVoid()),
715 PSInputAddr(MFI.getPSInputAddr()), PSInputEnable(MFI.getPSInputEnable()),
716 MaxMemoryClusterDWords(MFI.getMaxMemoryClusterDWords()),
717 Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()) {
766// Create a diagnostic for a the frame index. 773 SourceRange = YamlMFI.
ScavengeFI->SourceRange;
784return !
F.hasFnAttribute(
"amdgpu-no-agpr");
791if (!mayNeedAGPRs()) {
804for (
unsignedI = 0, E =
MRI.getNumVirtRegs();
I != E; ++
I) {
811if (!RC && !
MRI.use_empty(Reg) &&
MRI.getType(Reg).isValid()) {
812// Defer caching UsesAGPRs, function might not yet been regbank selected. 817for (
MCRegister Reg : AMDGPU::AGPR_32RegClass) {
818if (
MRI.isPhysRegUsed(Reg)) {
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
uint32_t getLDSSize() const
bool isChainFunction() const
bool hasInitWholeWave() const
bool isEntryFunction() const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Allocate memory in an ever growing pool, as if by bump-pointer.
Lightweight error class with error context and mandatory checking.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const SITargetLowering * getTargetLowering() const override
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Wrapper class representing physical registers. Should be passed by value.
void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
bool hasTailCall() const
Returns true if the function contains a tail call.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int getObjectIndexBegin() const
Return the minimum frame object index.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * cloneInfo(const Ty &Old)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
size_type count(const KeyT &Key) const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
Register addPrivateSegmentSize(const SIRegisterInfo &TRI)
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
Register getLongBranchReservedReg() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
unsigned getMaxWavesPerEU() const
ArrayRef< Register > getSGPRSpillPhysVGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register addQueuePtr(const SIRegisterInfo &TRI)
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
Register getGITPtrLoReg(const MachineFunction &MF) const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Register addLDSKernelId()
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
const ReservedRegSet & getWWMReservedRegs() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
void limitOccupancy(const MachineFunction &MF)
SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)
void reserveWWMRegister(Register Reg)
static bool isChainScratchRegister(Register VGPR)
static bool isAGPRClass(const TargetRegisterClass *RC)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Represents a location in source code.
Represents a range in source code.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
unsigned getMainFileID() const
const MemoryBuffer * getMemoryBuffer(unsigned i) const
StringRef - Represent a constant reference to a string, i.e.
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
constexpr bool empty() const
empty - Check if the string is empty.
const TargetMachine & getTargetMachine() const
ArrayRef< MCPhysReg > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A raw_ostream that writes to an std::string.
bool isEntryFunctionCC(CallingConv::ID CC)
bool isChainCC(CallingConv::ID CC)
unsigned getInitialPSInputAddr(const Function &F)
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
This is an optimization pass for GlobalISel generic memory operations.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr unsigned DefaultMemoryClusterDWordsLimit
const char * toString(DWARFSectionKind Kind)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
A serializaable representation of a reference to a stack object or fixed stack object.
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
static SIArgument createArgument(bool IsReg)
unsigned MaxMemoryClusterDWords
StringValue SGPRForEXECCopy
SmallVector< StringValue > WWMReservedRegs
uint32_t HighBitsOf32BitAddress
SIMachineFunctionInfo()=default
StringValue LongBranchReservedReg
uint64_t ExplicitKernArgSize
void mappingImpl(yaml::IO &YamlIO) override
StringValue VGPRForAGPRCopy
SmallVector< StringValue, 2 > SpillPhysVGPRS
std::optional< FrameIndex > ScavengeFI
unsigned BytesInStackArgArea
A wrapper around std::string which contains a source range that's being set during parsing.