1//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 10/// This file implements the lowering of LLVM calls to machine code calls for 13//===----------------------------------------------------------------------===// 24#include "llvm/IR/IntrinsicsAMDGPU.h" 26#define DEBUG_TYPE "amdgpu-call-lowering" 32/// Wrapper around extendRegister to ensure we extend to a full 32-bit register. 36// 16-bit types are reported as legal for 32-bit registers. We need to 37// extend and do a 32-bit copy to avoid the verifier complaining about it. 47 : OutgoingValueHandler(
B,
MRI), MIB(MIB) {}
65Register ExtReg = extendRegisterMin32(*
this, ValVReg, VA);
67// If this is a scalar return, insert a readfirstlane just in case the value 69// FIXME: Assert this is a shader return. 72if (
TRI->isSGPRReg(
MRI, PhysReg)) {
73LLT Ty =
MRI.getType(ExtReg);
76// FIXME: We should probably support readfirstlane intrinsics with all 80 ExtReg = MIRBuilder.buildPtrToInt(
S32, ExtReg).getReg(0);
82 ExtReg = MIRBuilder.buildBitcast(
S32, ExtReg).getReg(0);
85auto ToSGPR = MIRBuilder
86 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
87 {
MRI.getType(ExtReg)})
89 ExtReg = ToSGPR.getReg(0);
92 MIRBuilder.buildCopy(PhysReg, ExtReg);
101 : IncomingValueHandler(
B,
MRI) {}
106auto &MFI = MIRBuilder.getMF().getFrameInfo();
108// Byval is assumed to be writable memory, but other stack passed arguments 110constbool IsImmutable = !Flags.isByVal();
111int FI = MFI.CreateFixedObject(
Size,
Offset, IsImmutable);
113auto AddrReg = MIRBuilder.buildFrameIndex(
116return AddrReg.getReg(0);
121 markPhysRegUsed(PhysReg);
124// 16-bit types are reported as legal for 32-bit registers. We need to do 125// a 32-bit copy, and truncate to avoid the verifier complaining about it. 126auto Copy = MIRBuilder.buildCopy(
LLT::scalar(32), PhysReg);
128// If we have signext/zeroext, it applies to the whole 32-bit register 132 MIRBuilder.buildTrunc(ValVReg, Extended);
147 MIRBuilder.buildLoad(ValVReg,
Addr, *MMO);
150 /// How the physical register gets marked varies between formal 151 /// parameters (it's a basic-block live-in), and a call instruction 152 /// (it's an implicit-def of the BL). 153virtualvoid markPhysRegUsed(
unsigned PhysReg) = 0;
158 : AMDGPUIncomingArgHandler(
B,
MRI) {}
160void markPhysRegUsed(
unsigned PhysReg)
override{
161 MIRBuilder.getMBB().addLiveIn(PhysReg);
165structCallReturnHandler :
public AMDGPUIncomingArgHandler {
168 : AMDGPUIncomingArgHandler(MIRBuilder,
MRI), MIB(MIB) {}
170void markPhysRegUsed(
unsigned PhysReg)
override{
177structAMDGPUOutgoingArgHandler :
public AMDGPUOutgoingValueHandler {
178 /// For tail calls, the byte offset of the call's argument area from the 179 /// callee's. Unused elsewhere. 182// Cache the SP register vreg if we need it more than once in this call site. 189bool IsTailCall =
false,
int FPDiff = 0)
190 : AMDGPUOutgoingValueHandler(MIRBuilder,
MRI, MIB), FPDiff(FPDiff),
191 IsTailCall(IsTailCall) {}
205return FIReg.getReg(0);
212if (ST.enableFlatScratch()) {
213// The stack is accessed unswizzled, so we can use a regular copy. 217// The address we produce here, without knowing the use context, is going 218// to be interpreted as a vector address, so we need to convert to a 220 SPReg = MIRBuilder.
buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
227auto AddrReg = MIRBuilder.
buildPtrAdd(PtrTy, SPReg, OffsetReg);
229return AddrReg.getReg(0);
250 ? extendRegister(Arg.
Regs[ValRegIndex], VA)
251 : Arg.
Regs[ValRegIndex];
252 assignValueToAddress(ValVReg,
Addr, MemTy, MPO, VA);
255}
// anonymous namespace 261// FIXME: Compatibility shim 264case TargetOpcode::G_SEXT:
266case TargetOpcode::G_ZEXT:
268case TargetOpcode::G_ANYEXT:
279// For shaders. Vector types should be explicitly handled by CC. 285CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
291/// Lower the return value for the already existing \p Ret. This assumes that 292/// \p B's insertion point is correct. 311"For each split Type there should be exactly one VReg.");
315for (
unsigned i = 0; i < SplitEVTs.
size(); ++i) {
316EVT VT = SplitEVTs[i];
322unsigned ExtendOp = TargetOpcode::G_ANYEXT;
323if (RetInfo.Flags[0].isSExt()) {
324assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
325 ExtendOp = TargetOpcode::G_SEXT;
326 }
elseif (RetInfo.Flags[0].isZExt()) {
327assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
328 ExtendOp = TargetOpcode::G_ZEXT;
340if (Reg != RetInfo.Regs[0]) {
341 RetInfo.Regs[0] =
Reg;
342// Reset the arg flags after modifying Reg. 351 OutgoingValueAssigner Assigner(AssignFn);
352 AMDGPUOutgoingValueHandler RetHandler(
B, *
MRI, Ret);
365assert(!Val == VRegs.
empty() &&
"Return value without a vreg");
372B.buildInstr(AMDGPU::S_ENDPGM)
378 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
379auto Ret =
B.buildInstrNoInsert(ReturnOpc);
381if (!FLI.CanLowerReturn)
383elseif (!lowerReturnVal(
B, Val, VRegs, Ret))
386// TODO: Handle CalleeSavedRegsViaCopy. 399Register KernArgSegmentVReg =
MRI.getLiveInVirtReg(KernArgSegmentPtr);
403B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
408Align Alignment)
const{
421for (
ArgInfo &SplitArg : SplitArgs) {
422Register PtrReg =
B.getMRI()->createGenericVirtualRegister(PtrTy);
423 lowerParameterPtr(PtrReg,
B,
Offset + FieldOffsets[
Idx]);
426if (SplitArg.Flags[0].isPointer()) {
427// Compensate for losing pointeriness in splitValueTypes. 440assert(SplitArg.Regs.size() == 1);
442B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
447// Allocate special inputs passed in user SGPRs. 453// FIXME: How should these inputs interact with inreg / custom SGPR inputs? 457 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
463 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
469 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
477Register VReg =
MRI.createGenericVirtualRegister(P4);
478MRI.addLiveIn(InputPtrReg, VReg);
479B.getMBB().addLiveIn(InputPtrReg);
480B.buildCopy(VReg, InputPtrReg);
486 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
492 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
496// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read 497// these from the dispatch pointer. 512CCState CCInfo(
F.getCallingConv(),
F.isVarArg(), MF, ArgLocs,
F.getContext());
517constAlign KernArgBaseAlign(16);
521// TODO: Align down to dword alignment and extract bits for extending loads. 522for (
auto &Arg :
F.args()) {
523// TODO: Add support for kernarg preload. 524if (Arg.hasAttribute(
"amdgpu-hidden-argument")) {
525LLVM_DEBUG(
dbgs() <<
"Preloading hidden arguments is not supported\n");
529constbool IsByRef = Arg.hasByRefAttr();
530Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
531unsigned AllocSize =
DL.getTypeAllocSize(ArgTy);
535MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
536Align ABIAlign =
DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
539 ExplicitArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
541if (Arg.use_empty()) {
549unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
552"expected only one register for byval pointers");
554 lowerParameterPtr(VRegs[i][0],
B, ArgOffset);
557Register PtrReg =
MRI.createGenericVirtualRegister(ConstPtrTy);
558 lowerParameterPtr(PtrReg,
B, ArgOffset);
560B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
563ArgInfo OrigArg(VRegs[i], Arg, i);
566 lowerParameter(
B, OrigArg, ArgOffset, Alignment);
582// The infrastructure for normal calling convention lowering is essentially 583// useless for kernels. We want to avoid any kind of legalization or argument 600CCState CCInfo(
CC,
F.isVarArg(), MF, ArgLocs,
F.getContext());
605 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
609// FIXME: This probably isn't defined for mesa 612 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
618unsigned PSInputNum = 0;
620// Insert the hidden sret parameter if the return value won't fit in the 622if (!FLI.CanLowerReturn)
625for (
auto &Arg :
F.args()) {
626if (
DL.getTypeStoreSize(Arg.getType()) == 0)
629constbool InReg = Arg.hasAttribute(Attribute::InReg);
631if (Arg.hasAttribute(Attribute::SwiftSelf) ||
632 Arg.hasAttribute(Attribute::SwiftError) ||
633 Arg.hasAttribute(Attribute::Nest))
637constbool ArgUsed = !Arg.use_empty();
638bool SkipArg = !ArgUsed && !
Info->isPSInputAllocated(PSInputNum);
641Info->markPSInputAllocated(PSInputNum);
643Info->markPSInputEnabled(PSInputNum);
665// At least one interpolation mode must be enabled or else the GPU will 668// Check PSInputAddr instead of PSInputEnable. The idea is that if the user 669// set PSInputAddr, the user wants to enable some bits after the compilation 670// based on run-time states. Since we can't know what the final PSInputEna 671// will look like, so we shouldn't do anything here and the user should take 672// responsibility for the correct programming. 674// Otherwise, the following restrictions apply: 675// - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. 676// - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be 679if ((
Info->getPSInputAddr() & 0x7F) == 0 ||
680 ((
Info->getPSInputAddr() & 0xF) == 0 &&
681Info->isPSInputAllocated(11))) {
684Info->markPSInputAllocated(0);
685Info->markPSInputEnabled(0);
688if (Subtarget.isAmdPalOS()) {
689// For isAmdPalOS, the user does not enable some bits after compilation 690// based on run-time states; the register values being generated here are 691// the final ones set in hardware. Therefore we need to apply the 692// workaround to PSInputAddr and PSInputEnable together. (The case where 693// a bit is set in PSInputAddr but not PSInputEnable is where the frontend 694// set up an input arg for a particular interpolation mode, but nothing 695// uses that input arg. Really we should have an earlier pass that removes 697unsigned PsInputBits =
Info->getPSInputAddr() &
Info->getPSInputEnable();
698if ((PsInputBits & 0x7F) == 0 ||
699 ((PsInputBits & 0xF) == 0 &&
700 (PsInputBits >> 11 & 1)))
711if (!IsEntryFunc && !IsGraphics) {
712// For the fixed ABI, pass workitem IDs in the last argument register. 715if (!Subtarget.enableFlatScratch())
730// Start adding system SGPRs. 734// When we tail call, we need to check if the callee's arguments will fit on 735// the caller's stack. So, whenever we lower formal arguments, we should keep 736// track of this information, since we might lower a tail call in this 738Info->setBytesInStackArgArea(StackSize);
740// Move back to the end of the basic block. 752// If there's no call site, this doesn't correspond to a call from the IR and 753// doesn't need implicit inputs. 764// TODO: Unify with private memory register handling. This is complicated by 765// the fact that at least in kernels, the input argument is not necessarily 766// in the same location as the input. 779"amdgpu-no-dispatch-ptr",
780"amdgpu-no-queue-ptr",
781"amdgpu-no-implicitarg-ptr",
782"amdgpu-no-dispatch-id",
783"amdgpu-no-workgroup-id-x",
784"amdgpu-no-workgroup-id-y",
785"amdgpu-no-workgroup-id-z",
786"amdgpu-no-lds-kernel-id",
796for (
auto InputID : InputRegs) {
801// If the callee does not use the attribute value, skip copying the value. 802if (
Info.CB->hasFnAttr(ImplicitAttrNames[
I++]))
805 std::tie(OutgoingArg, ArgRC, ArgTy) =
812 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
813 CallerArgInfo.getPreloadedValue(InputID);
814assert(IncomingArgRC == ArgRC);
816Register InputReg =
MRI.createGenericVirtualRegister(ArgTy);
819 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
821 LI->getImplicitArgPtr(InputReg,
MRI, MIRBuilder);
823 std::optional<uint32_t> Id =
831// We may have proven the input wasn't needed, although the ABI is 832// requiring it. We just need to allocate the register appropriately. 837 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
841LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
846// Pack workitem IDs into a single register or pass it as is if already 852 std::tie(OutgoingArg, ArgRC, ArgTy) =
855 std::tie(OutgoingArg, ArgRC, ArgTy) =
858 std::tie(OutgoingArg, ArgRC, ArgTy) =
875constbool NeedWorkItemIDX = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-x");
876constbool NeedWorkItemIDY = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-y");
877constbool NeedWorkItemIDZ = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-z");
879// If incoming ids are not packed we need to pack them. 880// FIXME: Should consider known workgroup size to eliminate known 0 cases. 884if (ST.getMaxWorkitemID(MF.
getFunction(), 0) != 0) {
885 InputReg =
MRI.createGenericVirtualRegister(
S32);
886 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
887 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
894 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.
getFunction(), 1) != 0) {
896 LI->loadInputValue(
Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
897 std::get<2>(WorkitemIDY));
904 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.
getFunction(), 2) != 0) {
906 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
907 std::get<2>(WorkitemIDZ));
914 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
915 InputReg =
MRI.createGenericVirtualRegister(
S32);
916if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
917// We're in a situation where the outgoing function requires the workitem 918// ID, but the calling function does not have it (e.g a graphics function 919// calling a C calling convention function). This is illegal, but we need 920// to produce something. 923// Workitem ids are already packed, any of present incoming arguments will 924// carry all required fields. 926 IncomingArgX ? *IncomingArgX :
927 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
928 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
929 &AMDGPU::VGPR_32RegClass,
S32);
935 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
940LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
947/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for 949static std::pair<CCAssignFn *, CCAssignFn *>
955bool IsTailCall,
bool isWave32,
957// For calls to amdgpu_cs_chain functions, the address is known to be uniform. 959"Indirect calls can't be tail calls, " 960"because the address can be divergent");
962return AMDGPU::G_SI_CALL;
965return isWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
971// Add operands to call instruction to track the callee. 975if (
Info.Callee.isReg()) {
978 }
elseif (
Info.Callee.isGlobal() &&
Info.Callee.getOffset() == 0) {
979// The call lowering lightly assumed we can directly encode a call target in 980// the instruction, which is not the case. Materialize the address here. 999// If the calling conventions match, then everything must be the same. 1000if (CalleeCC == CallerCC)
1005// Make sure that the caller and callee preserve all of the same registers. 1006constauto *
TRI = ST.getRegisterInfo();
1008constuint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
1009constuint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
1010if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1013// Check if the caller and callee will handle arguments in the same way. 1017 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1022 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1025// FIXME: We are not accounting for potential differences in implicitly passed 1026// inputs, but only the fixed ABI is supported now anyway. 1028 CalleeAssignFnVarArg);
1030 CallerAssignFnVarArg);
1037// If there are no outgoing arguments, then we are done. 1050// We have outgoing arguments. Make sure that we can tail call with them. 1060// Make sure that they can fit on the caller's stack. 1063LLVM_DEBUG(
dbgs() <<
"... Cannot fit call operands on caller's stack.\n");
1067// Verify that the parameters in callee-saved registers match. 1070constuint32_t *CallerPreservedMask =
TRI->getCallPreservedMask(MF, CallerCC);
1075/// Return true if the calling convention is one that we can guarantee TCO for. 1080/// Return true if we might ever do TCO for calls with this calling convention. 1094// Must pass all target-independent checks in order to tail call optimize. 1095if (!
Info.IsTailCall)
1098// Indirect calls can't be tail calls, because the address can be divergent. 1099// TODO Check divergence info if the call really is divergent. 1100if (
Info.Callee.isReg())
1109constuint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
1110// Kernels aren't callable, and don't have a live in return address so it 1111// doesn't make sense to do a tail call with entry functions. 1112if (!CallerPreserved)
1116LLVM_DEBUG(
dbgs() <<
"... Calling convention cannot be tail called.\n");
1121 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1123LLVM_DEBUG(
dbgs() <<
"... Cannot tail call from callers with byval " 1124"or swifterror arguments\n");
1128// If we have -tailcallopt, then we're done. 1132// Verify that the incoming and outgoing arguments from the callee are 1133// safe to tail call. 1137 <<
"... Caller and callee have incompatible calling conventions.\n");
1141// FIXME: We need to check if any arguments passed in SGPR are uniform. If 1142// they are not, this cannot be a tail call. If they are uniform, but may be 1143// VGPR, we need to insert readfirstlanes. 1147LLVM_DEBUG(
dbgs() <<
"... Call is eligible for tail call optimization.\n");
1151// Insert outgoing implicit arguments for a call, by inserting copies to the 1152// implicit argument registers and adding the necessary implicit uses to the 1158ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs)
const{
1159if (!ST.enableFlatScratch()) {
1160// Insert copies for the SRD. In the HSA case, this should be an identity 1166 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
1167 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
1169 MIRBuilder.
buildCopy(CalleeRSrcReg, ScratchRSrcReg);
1173for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1189// True when we're tail calling, but without -tailcallopt. 1192// Find out which ABI gets to decide where things go. 1200 CallSeqStart = MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP);
1208// Byte offset for the tail call. When we are sibcalling, this will always 1212// If this is a chain call, we need to pass in the EXEC mask. 1216assert(ExecArg.
Regs.size() == 1 &&
"Too many regs for EXEC");
1221if (
constauto *CI = dyn_cast<ConstantInt>(ExecArg.
OrigValue)) {
1222 MIB.addImm(CI->getSExtValue());
1224 MIB.addReg(ExecArg.
Regs[0]);
1225unsignedIdx = MIB->getNumOperands() - 1;
1227 MF, *
TRI,
MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1228 MIB->getDesc(), MIB->getOperand(
Idx),
Idx));
1232// Tell the call which registers are clobbered. 1233constuint32_t *Mask =
TRI->getCallPreservedMask(MF, CalleeCC);
1234 MIB.addRegMask(Mask);
1236// FPDiff is the byte offset of the call's argument area from the callee's. 1237// Stores to callee stack arguments will be placed in FixedStackSlots offset 1238// by this amount for a tail call. In a sibling call it must be 0 because the 1239// caller will deallocate the entire stack and the callee still expects its 1240// arguments to begin at SP+0. 1243// This will be 0 for sibcalls, potentially nonzero for tail calls produced 1244// by -tailcallopt. For sibcalls, the memory operands for the call are 1245// already available in the caller's incoming argument space. 1246unsigned NumBytes = 0;
1248// We aren't sibcalling, so we need to compute FPDiff. We need to do this 1249// before handling assignments, because FPDiff must be known for memory 1251unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1253CCState OutInfo(CalleeCC,
false, MF, OutLocs,
F.getContext());
1255// FIXME: Not accounting for callee implicit inputs 1260// The callee will pop the argument stack as a tail call. Thus, we must 1261// keep it 16-byte aligned. 1264// FPDiff will be negative if this tail call requires more space than we 1265// would automatically have in our incoming argument space. Positive if we 1266// actually shrink the stack. 1267 FPDiff = NumReusableBytes - NumBytes;
1269// The stack pointer must be 16-byte aligned at all times it's used for a 1270// memory operation, which in practice means at *all* times and in 1271// particular across call boundaries. Therefore our own arguments started at 1272// a 16-byte aligned SP and the delta applied for the tail call should 1273// satisfy the same constraint. 1275"unaligned stack on tail call");
1281// We could pass MIB and directly add the implicit uses to the call 1282// now. However, as an aesthetic choice, place implicit argument operands 1283// after the ordinary user argument registers. 1288// With a fixed ABI, allocate fixed registers before user arguments. 1298// Do the actual argument marshalling. 1299 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
true, FPDiff);
1303if (
Info.ConvergenceCtrlToken) {
1309// If we have -tailcallopt, we need to adjust the stack. We'll do the call 1310// sequence start and end here. 1312 MIB->getOperand(1).setImm(FPDiff);
1314// End the call sequence *before* emitting the call. Normally, we would 1315// tidy the frame up after the call. However, here, we've laid out the 1316// parameters so that when SP is reset, they will be in the correct 1321// Now we can add the actual call instruction to the correct basic block. 1324// If Callee is a reg, since it is used by a target specific 1325// instruction, it must have a register class matching the 1326// constraint of that instruction. 1328// FIXME: We should define regbankselectable call instructions to handle 1329// divergent call targets. 1330if (MIB->getOperand(0).isReg()) {
1332 MF, *
TRI,
MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1333 MIB->getDesc(), MIB->getOperand(0), 0));
1337Info.LoweredTailCall =
true;
1341/// Lower a call to the @llvm.amdgcn.cs.chain intrinsic. 1349assert(cast<ConstantInt>(Flags.OrigValue)->isZero() &&
1350"Non-zero flags aren't supported yet.");
1351assert(
Info.OrigArgs.size() == 5 &&
"Additional args aren't supported yet.");
1357// The function to jump to is actually the first argument, so we'll change the 1358// Callee and other info to match that before using our existing helper. 1359constValue *CalleeV = Callee.OrigValue->stripPointerCasts();
1360if (
constFunction *
F = dyn_cast<Function>(CalleeV)) {
1362Info.CallConv =
F->getCallingConv();
1364assert(Callee.Regs.size() == 1 &&
"Too many regs for the callee");
1367// behaves the same here. 1370// The function that we're calling cannot be vararg (only the intrinsic is). 1371Info.IsVarArg =
false;
1375"SGPR arguments should be marked inreg");
1378"VGPR arguments should not be marked inreg");
1384Info.IsMustTailCall =
true;
1391if (
F->isIntrinsic()) {
1392assert(
F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
1393"Unexpected intrinsic");
1412for (
auto &OrigArg :
Info.OrigArgs)
1416if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy())
1419// If we can lower as a tail call, do that instead. 1420bool CanTailCallOpt =
1423// We must emit a tail call if we have musttail. 1424if (
Info.IsMustTailCall && !CanTailCallOpt) {
1425LLVM_DEBUG(
dbgs() <<
"Failed to lower musttail call as tail call\n");
1429Info.IsTailCall = CanTailCallOpt;
1433// Find out which ABI gets to decide where things go. 1436 std::tie(AssignFnFixed, AssignFnVarArg) =
1439 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP)
1443// Create a temporarily-floating call instruction so we can add the implicit 1444// uses of arg registers. 1449 MIB.
addDef(
TRI->getReturnAddressReg(MF));
1451if (!
Info.IsConvergent)
1457// Tell the call which registers are clobbered. 1459 MIB.addRegMask(Mask);
1464// We could pass MIB and directly add the implicit uses to the call 1465// now. However, as an aesthetic choice, place implicit argument operands 1466// after the ordinary user argument registers. 1470// With a fixed ABI, allocate fixed registers before user arguments. 1475// Do the actual argument marshalling. 1482 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
false);
1488if (
Info.ConvergenceCtrlToken) {
1494// Get a count of how many bytes are to be pushed on the stack. 1497// If Callee is a reg, since it is used by a target specific 1498// instruction, it must have a register class matching the 1499// constraint of that instruction. 1501// FIXME: We should define regbankselectable call instructions to handle 1502// divergent call targets. 1503if (MIB->getOperand(1).isReg()) {
1505 MF, *
TRI,
MRI, *ST.getInstrInfo(),
1506 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1510// Now we can add the actual call instruction to the correct position. 1513// Finally we can copy the returned value back into its virtual-register. In 1514// symmetry with the arguments, the physical register must be an 1515// implicit-define of the call instruction. 1516if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy()) {
1520 CallReturnHandler Handler(MIRBuilder,
MRI, MIB);
1528 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1532if (!
Info.CanLowerReturn) {
1534Info.DemoteRegister,
Info.DemoteStackIndex);
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, CallingConv::ID CalleeCC, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
bool lowerChainCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const
Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
CCState - This class holds information needed while lowering arguments and return values.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs={}) const
Use Handler to insert code to handle the argument/return values represented by Args.
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs={}) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
A parsed version of the target data layout string in and methods for querying it.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
iterator_range< arg_iterator > args()
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const SIRegisterInfo * getRegisterInfo() const override
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
unsigned getAddressSpace() const
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
This is an important class for using LLVM in a threaded context.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setHasTailCall(bool V=true)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
unsigned getBytesInStackArgArea() const
void setIfReturnsVoid(bool Value)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
AMDGPUFunctionArgInfo & getArgInfo()
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isChainCC(CallingConv::ID CC)
bool isShader(CallingConv::ID cc)
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ SIGN_EXTEND
Conversion operators.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
ArgDescriptor WorkItemIDZ
ArgDescriptor WorkItemIDY
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
This struct is a compact representation of a valid (non-zero power of two) alignment.
MCRegister getRegister() const
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Helper struct shared between Function Specialization and SCCP Solver.
const Value * OrigValue
Optionally track the original IR value for the argument.
SmallVector< Register, 4 > Regs
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA) override
Provides a default implementation for argument handling.
Register buildExtensionHint(const CCValAssign &VA, Register SrcReg, LLT NarrowTy)
Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
uint64_t StackSize
The size of the currently allocated portion of the stack.
MachineIRBuilder & MIRBuilder
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0
Materialize a VReg containing the address of the specified stack-based object.
virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA)=0
The specified value has been assigned to a stack location.
Register extendRegister(Register ValReg, const CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
virtual void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA)=0
The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.