Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUCallLowering.h"
16#include "AMDGPU.h"
17#include "AMDGPULegalizerInfo.h"
18#include "SIMachineFunctionInfo.h"
19#include "SIRegisterInfo.h"
20#include "llvm/CodeGen/Analysis.h"
21#include "llvm/CodeGen/FunctionLoweringInfo.h"
22#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/IR/IntrinsicsAMDGPU.h"
25
26#define DEBUG_TYPE "amdgpu-call-lowering"
27
28using namespacellvm;
29
30namespace{
31
32/// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
33staticRegister extendRegisterMin32(CallLowering::ValueHandler &Handler,
34Register ValVReg,constCCValAssign &VA) {
35if (VA.getLocVT().getSizeInBits() < 32) {
36// 16-bit types are reported as legal for 32-bit registers. We need to
37// extend and do a 32-bit copy to avoid the verifier complaining about it.
38return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
39 }
40
41return Handler.extendRegister(ValVReg, VA);
42}
43
44structAMDGPUOutgoingValueHandler :publicCallLowering::OutgoingValueHandler {
45 AMDGPUOutgoingValueHandler(MachineIRBuilder &B,MachineRegisterInfo &MRI,
46MachineInstrBuilder MIB)
47 : OutgoingValueHandler(B,MRI), MIB(MIB) {}
48
49MachineInstrBuilder MIB;
50
51RegistergetStackAddress(uint64_tSize, int64_tOffset,
52MachinePointerInfo &MPO,
53ISD::ArgFlagsTy Flags) override{
54llvm_unreachable("not implemented");
55 }
56
57voidassignValueToAddress(Register ValVReg,RegisterAddr,LLT MemTy,
58constMachinePointerInfo &MPO,
59constCCValAssign &VA) override{
60llvm_unreachable("not implemented");
61 }
62
63voidassignValueToReg(Register ValVReg,Register PhysReg,
64constCCValAssign &VA) override{
65Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
66
67// If this is a scalar return, insert a readfirstlane just in case the value
68// ends up in a VGPR.
69// FIXME: Assert this is a shader return.
70constSIRegisterInfo *TRI
71 =static_cast<constSIRegisterInfo *>(MRI.getTargetRegisterInfo());
72if (TRI->isSGPRReg(MRI, PhysReg)) {
73LLT Ty =MRI.getType(ExtReg);
74LLTS32 =LLT::scalar(32);
75if (Ty !=S32) {
76// FIXME: We should probably support readfirstlane intrinsics with all
77// legal 32-bit types.
78assert(Ty.getSizeInBits() == 32);
79if (Ty.isPointer())
80 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
81else
82 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
83 }
84
85auto ToSGPR = MIRBuilder
86 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
87 {MRI.getType(ExtReg)})
88 .addReg(ExtReg);
89 ExtReg = ToSGPR.getReg(0);
90 }
91
92 MIRBuilder.buildCopy(PhysReg, ExtReg);
93 MIB.addUse(PhysReg,RegState::Implicit);
94 }
95};
96
97structAMDGPUIncomingArgHandler :publicCallLowering::IncomingValueHandler {
98uint64_t StackUsed = 0;
99
100 AMDGPUIncomingArgHandler(MachineIRBuilder &B,MachineRegisterInfo &MRI)
101 : IncomingValueHandler(B,MRI) {}
102
103RegistergetStackAddress(uint64_tSize, int64_tOffset,
104MachinePointerInfo &MPO,
105ISD::ArgFlagsTy Flags) override{
106auto &MFI = MIRBuilder.getMF().getFrameInfo();
107
108// Byval is assumed to be writable memory, but other stack passed arguments
109// are not.
110constbool IsImmutable = !Flags.isByVal();
111int FI = MFI.CreateFixedObject(Size,Offset, IsImmutable);
112 MPO =MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
113auto AddrReg = MIRBuilder.buildFrameIndex(
114LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), FI);
115 StackUsed = std::max(StackUsed,Size +Offset);
116return AddrReg.getReg(0);
117 }
118
119voidassignValueToReg(Register ValVReg,Register PhysReg,
120constCCValAssign &VA) override{
121 markPhysRegUsed(PhysReg);
122
123if (VA.getLocVT().getSizeInBits() < 32) {
124// 16-bit types are reported as legal for 32-bit registers. We need to do
125// a 32-bit copy, and truncate to avoid the verifier complaining about it.
126auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
127
128// If we have signext/zeroext, it applies to the whole 32-bit register
129// before truncation.
130auto Extended =
131buildExtensionHint(VA, Copy.getReg(0),LLT(VA.getLocVT()));
132 MIRBuilder.buildTrunc(ValVReg, Extended);
133return;
134 }
135
136IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
137 }
138
139voidassignValueToAddress(Register ValVReg,RegisterAddr,LLT MemTy,
140constMachinePointerInfo &MPO,
141constCCValAssign &VA) override{
142MachineFunction &MF = MIRBuilder.getMF();
143
144auto *MMO = MF.getMachineMemOperand(
145 MPO,MachineMemOperand::MOLoad |MachineMemOperand::MOInvariant, MemTy,
146inferAlignFromPtrInfo(MF, MPO));
147 MIRBuilder.buildLoad(ValVReg,Addr, *MMO);
148 }
149
150 /// How the physical register gets marked varies between formal
151 /// parameters (it's a basic-block live-in), and a call instruction
152 /// (it's an implicit-def of the BL).
153virtualvoid markPhysRegUsed(unsigned PhysReg) = 0;
154};
155
156structFormalArgHandler :public AMDGPUIncomingArgHandler {
157FormalArgHandler(MachineIRBuilder &B,MachineRegisterInfo &MRI)
158 : AMDGPUIncomingArgHandler(B,MRI) {}
159
160void markPhysRegUsed(unsigned PhysReg) override{
161 MIRBuilder.getMBB().addLiveIn(PhysReg);
162 }
163};
164
165structCallReturnHandler :public AMDGPUIncomingArgHandler {
166 CallReturnHandler(MachineIRBuilder &MIRBuilder,MachineRegisterInfo &MRI,
167MachineInstrBuilder MIB)
168 : AMDGPUIncomingArgHandler(MIRBuilder,MRI), MIB(MIB) {}
169
170void markPhysRegUsed(unsigned PhysReg) override{
171 MIB.addDef(PhysReg,RegState::Implicit);
172 }
173
174MachineInstrBuilder MIB;
175};
176
177structAMDGPUOutgoingArgHandler :public AMDGPUOutgoingValueHandler {
178 /// For tail calls, the byte offset of the call's argument area from the
179 /// callee's. Unused elsewhere.
180int FPDiff;
181
182// Cache the SP register vreg if we need it more than once in this call site.
183Register SPReg;
184
185bool IsTailCall;
186
187 AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
188MachineRegisterInfo &MRI,MachineInstrBuilder MIB,
189bool IsTailCall =false,int FPDiff = 0)
190 : AMDGPUOutgoingValueHandler(MIRBuilder,MRI, MIB), FPDiff(FPDiff),
191 IsTailCall(IsTailCall) {}
192
193Register getStackAddress(uint64_tSize, int64_tOffset,
194MachinePointerInfo &MPO,
195ISD::ArgFlagsTy Flags) override{
196MachineFunction &MF = MIRBuilder.getMF();
197constLLT PtrTy =LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32);
198constLLTS32 =LLT::scalar(32);
199
200if (IsTailCall) {
201Offset += FPDiff;
202int FI = MF.getFrameInfo().CreateFixedObject(Size,Offset,true);
203auto FIReg = MIRBuilder.buildFrameIndex(PtrTy, FI);
204 MPO =MachinePointerInfo::getFixedStack(MF, FI);
205return FIReg.getReg(0);
206 }
207
208constSIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
209
210if (!SPReg) {
211constGCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
212if (ST.enableFlatScratch()) {
213// The stack is accessed unswizzled, so we can use a regular copy.
214 SPReg = MIRBuilder.buildCopy(PtrTy,
215 MFI->getStackPtrOffsetReg()).getReg(0);
216 }else {
217// The address we produce here, without knowing the use context, is going
218// to be interpreted as a vector address, so we need to convert to a
219// swizzled address.
220 SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
221 {MFI->getStackPtrOffsetReg()}).getReg(0);
222 }
223 }
224
225auto OffsetReg = MIRBuilder.buildConstant(S32,Offset);
226
227auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
228 MPO =MachinePointerInfo::getStack(MF,Offset);
229return AddrReg.getReg(0);
230 }
231
232void assignValueToAddress(Register ValVReg,RegisterAddr,LLT MemTy,
233constMachinePointerInfo &MPO,
234constCCValAssign &VA) override{
235MachineFunction &MF = MIRBuilder.getMF();
236uint64_t LocMemOffset = VA.getLocMemOffset();
237constauto &ST = MF.getSubtarget<GCNSubtarget>();
238
239auto *MMO = MF.getMachineMemOperand(
240 MPO,MachineMemOperand::MOStore, MemTy,
241commonAlignment(ST.getStackAlignment(), LocMemOffset));
242 MIRBuilder.buildStore(ValVReg,Addr, *MMO);
243 }
244
245void assignValueToAddress(constCallLowering::ArgInfo &Arg,
246unsigned ValRegIndex,RegisterAddr,LLT MemTy,
247constMachinePointerInfo &MPO,
248constCCValAssign &VA) override{
249Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
250 ? extendRegister(Arg.Regs[ValRegIndex], VA)
251 : Arg.Regs[ValRegIndex];
252 assignValueToAddress(ValVReg,Addr, MemTy, MPO, VA);
253 }
254};
255}// anonymous namespace
256
257AMDGPUCallLowering::AMDGPUCallLowering(constAMDGPUTargetLowering &TLI)
258 :CallLowering(&TLI) {
259}
260
261// FIXME: Compatibility shim
262staticISD::NodeTypeextOpcodeToISDExtOpcode(unsigned MIOpc) {
263switch (MIOpc) {
264case TargetOpcode::G_SEXT:
265returnISD::SIGN_EXTEND;
266case TargetOpcode::G_ZEXT:
267returnISD::ZERO_EXTEND;
268case TargetOpcode::G_ANYEXT:
269returnISD::ANY_EXTEND;
270default:
271llvm_unreachable("not an extend opcode");
272 }
273}
274
275bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
276CallingConv::ID CallConv,
277SmallVectorImpl<BaseArgInfo> &Outs,
278bool IsVarArg) const{
279// For shaders. Vector types should be explicitly handled by CC.
280if (AMDGPU::isEntryFunctionCC(CallConv))
281returntrue;
282
283SmallVector<CCValAssign, 16> ArgLocs;
284constSITargetLowering &TLI = *getTLI<SITargetLowering>();
285CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
286 MF.getFunction().getContext());
287
288returncheckReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
289}
290
291/// Lower the return value for the already existing \p Ret. This assumes that
292/// \p B's insertion point is correct.
293bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
294constValue *Val,ArrayRef<Register> VRegs,
295MachineInstrBuilder &Ret) const{
296if (!Val)
297returntrue;
298
299auto &MF =B.getMF();
300constauto &F = MF.getFunction();
301constDataLayout &DL = MF.getDataLayout();
302MachineRegisterInfo *MRI =B.getMRI();
303LLVMContext &Ctx =F.getContext();
304
305CallingConv::IDCC =F.getCallingConv();
306constSITargetLowering &TLI = *getTLI<SITargetLowering>();
307
308SmallVector<EVT, 8> SplitEVTs;
309ComputeValueVTs(TLI,DL, Val->getType(), SplitEVTs);
310assert(VRegs.size() == SplitEVTs.size() &&
311"For each split Type there should be exactly one VReg.");
312
313SmallVector<ArgInfo, 8> SplitRetInfos;
314
315for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
316EVT VT = SplitEVTs[i];
317RegisterReg = VRegs[i];
318ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx), 0);
319setArgFlags(RetInfo,AttributeList::ReturnIndex,DL,F);
320
321if (VT.isScalarInteger()) {
322unsigned ExtendOp = TargetOpcode::G_ANYEXT;
323if (RetInfo.Flags[0].isSExt()) {
324assert(RetInfo.Regs.size() == 1 &&"expect only simple return values");
325 ExtendOp = TargetOpcode::G_SEXT;
326 }elseif (RetInfo.Flags[0].isZExt()) {
327assert(RetInfo.Regs.size() == 1 &&"expect only simple return values");
328 ExtendOp = TargetOpcode::G_ZEXT;
329 }
330
331EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
332extOpcodeToISDExtOpcode(ExtendOp));
333if (ExtVT != VT) {
334 RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
335LLT ExtTy =getLLTForType(*RetInfo.Ty,DL);
336Reg =B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
337 }
338 }
339
340if (Reg != RetInfo.Regs[0]) {
341 RetInfo.Regs[0] =Reg;
342// Reset the arg flags after modifying Reg.
343setArgFlags(RetInfo,AttributeList::ReturnIndex,DL,F);
344 }
345
346splitToValueTypes(RetInfo, SplitRetInfos,DL,CC);
347 }
348
349CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC,F.isVarArg());
350
351 OutgoingValueAssigner Assigner(AssignFn);
352 AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
353returndetermineAndHandleAssignments(RetHandler, Assigner, SplitRetInfos,B,
354CC,F.isVarArg());
355}
356
357boolAMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,constValue *Val,
358ArrayRef<Register> VRegs,
359FunctionLoweringInfo &FLI) const{
360
361MachineFunction &MF =B.getMF();
362SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
363 MFI->setIfReturnsVoid(!Val);
364
365assert(!Val == VRegs.empty() &&"Return value without a vreg");
366
367CallingConv::IDCC =B.getMF().getFunction().getCallingConv();
368constbool IsShader =AMDGPU::isShader(CC);
369constbool IsWaveEnd =
370 (IsShader && MFI->returnsVoid()) ||AMDGPU::isKernel(CC);
371if (IsWaveEnd) {
372B.buildInstr(AMDGPU::S_ENDPGM)
373 .addImm(0);
374returntrue;
375 }
376
377unsigned ReturnOpc =
378 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
379auto Ret =B.buildInstrNoInsert(ReturnOpc);
380
381if (!FLI.CanLowerReturn)
382insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
383elseif (!lowerReturnVal(B, Val, VRegs, Ret))
384returnfalse;
385
386// TODO: Handle CalleeSavedRegsViaCopy.
387
388B.insertInstr(Ret);
389returntrue;
390}
391
392void AMDGPUCallLowering::lowerParameterPtr(Register DstReg,MachineIRBuilder &B,
393uint64_tOffset) const{
394MachineFunction &MF =B.getMF();
395constSIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
396MachineRegisterInfo &MRI = MF.getRegInfo();
397Register KernArgSegmentPtr =
398 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
399Register KernArgSegmentVReg =MRI.getLiveInVirtReg(KernArgSegmentPtr);
400
401auto OffsetReg =B.buildConstant(LLT::scalar(64),Offset);
402
403B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
404}
405
406void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B,ArgInfo &OrigArg,
407uint64_tOffset,
408Align Alignment) const{
409MachineFunction &MF =B.getMF();
410constFunction &F = MF.getFunction();
411constDataLayout &DL =F.getDataLayout();
412MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
413
414LLT PtrTy =LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
415
416SmallVector<ArgInfo, 32> SplitArgs;
417SmallVector<uint64_t> FieldOffsets;
418splitToValueTypes(OrigArg, SplitArgs,DL,F.getCallingConv(), &FieldOffsets);
419
420unsignedIdx = 0;
421for (ArgInfo &SplitArg : SplitArgs) {
422Register PtrReg =B.getMRI()->createGenericVirtualRegister(PtrTy);
423 lowerParameterPtr(PtrReg,B,Offset + FieldOffsets[Idx]);
424
425LLT ArgTy =getLLTForType(*SplitArg.Ty,DL);
426if (SplitArg.Flags[0].isPointer()) {
427// Compensate for losing pointeriness in splitValueTypes.
428LLT PtrTy =LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),
429 ArgTy.getScalarSizeInBits());
430 ArgTy = ArgTy.isVector() ?LLT::vector(ArgTy.getElementCount(), PtrTy)
431 : PtrTy;
432 }
433
434MachineMemOperand *MMO = MF.getMachineMemOperand(
435 PtrInfo,
436MachineMemOperand::MOLoad |MachineMemOperand::MODereferenceable |
437MachineMemOperand::MOInvariant,
438 ArgTy,commonAlignment(Alignment, FieldOffsets[Idx]));
439
440assert(SplitArg.Regs.size() == 1);
441
442B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
443 ++Idx;
444 }
445}
446
447// Allocate special inputs passed in user SGPRs.
448staticvoidallocateHSAUserSGPRs(CCState &CCInfo,
449MachineIRBuilder &B,
450MachineFunction &MF,
451constSIRegisterInfo &TRI,
452SIMachineFunctionInfo &Info) {
453// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
454constGCNUserSGPRUsageInfo &UserSGPRInfo =Info.getUserSGPRInfo();
455if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
456Register PrivateSegmentBufferReg =Info.addPrivateSegmentBuffer(TRI);
457 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
458 CCInfo.AllocateReg(PrivateSegmentBufferReg);
459 }
460
461if (UserSGPRInfo.hasDispatchPtr()) {
462Register DispatchPtrReg =Info.addDispatchPtr(TRI);
463 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
464 CCInfo.AllocateReg(DispatchPtrReg);
465 }
466
467if (UserSGPRInfo.hasQueuePtr()) {
468Register QueuePtrReg =Info.addQueuePtr(TRI);
469 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
470 CCInfo.AllocateReg(QueuePtrReg);
471 }
472
473if (UserSGPRInfo.hasKernargSegmentPtr()) {
474MachineRegisterInfo &MRI = MF.getRegInfo();
475Register InputPtrReg =Info.addKernargSegmentPtr(TRI);
476constLLT P4 =LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
477Register VReg =MRI.createGenericVirtualRegister(P4);
478MRI.addLiveIn(InputPtrReg, VReg);
479B.getMBB().addLiveIn(InputPtrReg);
480B.buildCopy(VReg, InputPtrReg);
481 CCInfo.AllocateReg(InputPtrReg);
482 }
483
484if (UserSGPRInfo.hasDispatchID()) {
485Register DispatchIDReg =Info.addDispatchID(TRI);
486 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
487 CCInfo.AllocateReg(DispatchIDReg);
488 }
489
490if (UserSGPRInfo.hasFlatScratchInit()) {
491Register FlatScratchInitReg =Info.addFlatScratchInit(TRI);
492 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
493 CCInfo.AllocateReg(FlatScratchInitReg);
494 }
495
496// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
497// these from the dispatch pointer.
498}
499
500boolAMDGPUCallLowering::lowerFormalArgumentsKernel(
501MachineIRBuilder &B,constFunction &F,
502ArrayRef<ArrayRef<Register>> VRegs) const{
503MachineFunction &MF =B.getMF();
504constGCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
505MachineRegisterInfo &MRI = MF.getRegInfo();
506SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
507constSIRegisterInfo *TRI = Subtarget->getRegisterInfo();
508constSITargetLowering &TLI = *getTLI<SITargetLowering>();
509constDataLayout &DL =F.getDataLayout();
510
511SmallVector<CCValAssign, 16> ArgLocs;
512CCState CCInfo(F.getCallingConv(),F.isVarArg(), MF, ArgLocs,F.getContext());
513
514allocateHSAUserSGPRs(CCInfo,B, MF, *TRI, *Info);
515
516unsigned i = 0;
517constAlign KernArgBaseAlign(16);
518constunsigned BaseOffset = Subtarget->getExplicitKernelArgOffset();
519uint64_t ExplicitArgOffset = 0;
520
521// TODO: Align down to dword alignment and extract bits for extending loads.
522for (auto &Arg :F.args()) {
523// TODO: Add support for kernarg preload.
524if (Arg.hasAttribute("amdgpu-hidden-argument")) {
525LLVM_DEBUG(dbgs() <<"Preloading hidden arguments is not supported\n");
526returnfalse;
527 }
528
529constbool IsByRef = Arg.hasByRefAttr();
530Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
531unsigned AllocSize =DL.getTypeAllocSize(ArgTy);
532if (AllocSize == 0)
533continue;
534
535MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
536Align ABIAlign =DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
537
538uint64_t ArgOffset =alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
539 ExplicitArgOffset =alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
540
541if (Arg.use_empty()) {
542 ++i;
543continue;
544 }
545
546Align Alignment =commonAlignment(KernArgBaseAlign, ArgOffset);
547
548if (IsByRef) {
549unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
550
551assert(VRegs[i].size() == 1 &&
552"expected only one register for byval pointers");
553if (ByRefAS ==AMDGPUAS::CONSTANT_ADDRESS) {
554 lowerParameterPtr(VRegs[i][0],B, ArgOffset);
555 }else {
556constLLT ConstPtrTy =LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
557Register PtrReg =MRI.createGenericVirtualRegister(ConstPtrTy);
558 lowerParameterPtr(PtrReg,B, ArgOffset);
559
560B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
561 }
562 }else {
563ArgInfo OrigArg(VRegs[i], Arg, i);
564constunsigned OrigArgIdx = i +AttributeList::FirstArgIndex;
565setArgFlags(OrigArg, OrigArgIdx,DL,F);
566 lowerParameter(B, OrigArg, ArgOffset, Alignment);
567 }
568
569 ++i;
570 }
571
572 TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
573 TLI.allocateSystemSGPRs(CCInfo, MF, *Info,F.getCallingConv(),false);
574returntrue;
575}
576
577boolAMDGPUCallLowering::lowerFormalArguments(
578MachineIRBuilder &B,constFunction &F,ArrayRef<ArrayRef<Register>> VRegs,
579FunctionLoweringInfo &FLI) const{
580CallingConv::IDCC =F.getCallingConv();
581
582// The infrastructure for normal calling convention lowering is essentially
583// useless for kernels. We want to avoid any kind of legalization or argument
584// splitting.
585if (CC ==CallingConv::AMDGPU_KERNEL)
586returnlowerFormalArgumentsKernel(B,F, VRegs);
587
588constbool IsGraphics =AMDGPU::isGraphics(CC);
589constbool IsEntryFunc =AMDGPU::isEntryFunctionCC(CC);
590
591MachineFunction &MF =B.getMF();
592MachineBasicBlock &MBB =B.getMBB();
593MachineRegisterInfo &MRI = MF.getRegInfo();
594SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
595constGCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
596constSIRegisterInfo *TRI = Subtarget.getRegisterInfo();
597constDataLayout &DL =F.getDataLayout();
598
599SmallVector<CCValAssign, 16> ArgLocs;
600CCState CCInfo(CC,F.isVarArg(), MF, ArgLocs,F.getContext());
601constGCNUserSGPRUsageInfo &UserSGPRInfo =Info->getUserSGPRInfo();
602
603if (UserSGPRInfo.hasImplicitBufferPtr()) {
604Register ImplicitBufferPtrReg =Info->addImplicitBufferPtr(*TRI);
605 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
606 CCInfo.AllocateReg(ImplicitBufferPtrReg);
607 }
608
609// FIXME: This probably isn't defined for mesa
610if (UserSGPRInfo.hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
611Register FlatScratchInitReg =Info->addFlatScratchInit(*TRI);
612 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
613 CCInfo.AllocateReg(FlatScratchInitReg);
614 }
615
616SmallVector<ArgInfo, 32> SplitArgs;
617unsignedIdx = 0;
618unsigned PSInputNum = 0;
619
620// Insert the hidden sret parameter if the return value won't fit in the
621// return registers.
622if (!FLI.CanLowerReturn)
623insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister,MRI,DL);
624
625for (auto &Arg :F.args()) {
626if (DL.getTypeStoreSize(Arg.getType()) == 0)
627continue;
628
629constbool InReg = Arg.hasAttribute(Attribute::InReg);
630
631if (Arg.hasAttribute(Attribute::SwiftSelf) ||
632 Arg.hasAttribute(Attribute::SwiftError) ||
633 Arg.hasAttribute(Attribute::Nest))
634returnfalse;
635
636if (CC ==CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
637constbool ArgUsed = !Arg.use_empty();
638bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
639
640if (!SkipArg) {
641Info->markPSInputAllocated(PSInputNum);
642if (ArgUsed)
643Info->markPSInputEnabled(PSInputNum);
644 }
645
646 ++PSInputNum;
647
648if (SkipArg) {
649for (Register R : VRegs[Idx])
650B.buildUndef(R);
651
652 ++Idx;
653continue;
654 }
655 }
656
657ArgInfo OrigArg(VRegs[Idx], Arg,Idx);
658constunsigned OrigArgIdx =Idx +AttributeList::FirstArgIndex;
659setArgFlags(OrigArg, OrigArgIdx,DL,F);
660
661splitToValueTypes(OrigArg, SplitArgs,DL,CC);
662 ++Idx;
663 }
664
665// At least one interpolation mode must be enabled or else the GPU will
666// hang.
667//
668// Check PSInputAddr instead of PSInputEnable. The idea is that if the user
669// set PSInputAddr, the user wants to enable some bits after the compilation
670// based on run-time states. Since we can't know what the final PSInputEna
671// will look like, so we shouldn't do anything here and the user should take
672// responsibility for the correct programming.
673//
674// Otherwise, the following restrictions apply:
675// - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
676// - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
677// enabled too.
678if (CC ==CallingConv::AMDGPU_PS) {
679if ((Info->getPSInputAddr() & 0x7F) == 0 ||
680 ((Info->getPSInputAddr() & 0xF) == 0 &&
681Info->isPSInputAllocated(11))) {
682 CCInfo.AllocateReg(AMDGPU::VGPR0);
683 CCInfo.AllocateReg(AMDGPU::VGPR1);
684Info->markPSInputAllocated(0);
685Info->markPSInputEnabled(0);
686 }
687
688if (Subtarget.isAmdPalOS()) {
689// For isAmdPalOS, the user does not enable some bits after compilation
690// based on run-time states; the register values being generated here are
691// the final ones set in hardware. Therefore we need to apply the
692// workaround to PSInputAddr and PSInputEnable together. (The case where
693// a bit is set in PSInputAddr but not PSInputEnable is where the frontend
694// set up an input arg for a particular interpolation mode, but nothing
695// uses that input arg. Really we should have an earlier pass that removes
696// such an arg.)
697unsigned PsInputBits =Info->getPSInputAddr() &Info->getPSInputEnable();
698if ((PsInputBits & 0x7F) == 0 ||
699 ((PsInputBits & 0xF) == 0 &&
700 (PsInputBits >> 11 & 1)))
701Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));
702 }
703 }
704
705constSITargetLowering &TLI = *getTLI<SITargetLowering>();
706CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC,F.isVarArg());
707
708if (!MBB.empty())
709B.setInstr(*MBB.begin());
710
711if (!IsEntryFunc && !IsGraphics) {
712// For the fixed ABI, pass workitem IDs in the last argument register.
713 TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
714
715if (!Subtarget.enableFlatScratch())
716 CCInfo.AllocateReg(Info->getScratchRSrcReg());
717 TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
718 }
719
720IncomingValueAssigner Assigner(AssignFn);
721if (!determineAssignments(Assigner, SplitArgs, CCInfo))
722returnfalse;
723
724FormalArgHandler Handler(B,MRI);
725if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs,B))
726returnfalse;
727
728uint64_t StackSize = Assigner.StackSize;
729
730// Start adding system SGPRs.
731if (IsEntryFunc)
732 TLI.allocateSystemSGPRs(CCInfo, MF, *Info,CC, IsGraphics);
733
734// When we tail call, we need to check if the callee's arguments will fit on
735// the caller's stack. So, whenever we lower formal arguments, we should keep
736// track of this information, since we might lower a tail call in this
737// function later.
738Info->setBytesInStackArgArea(StackSize);
739
740// Move back to the end of the basic block.
741B.setMBB(MBB);
742
743returntrue;
744}
745
746boolAMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
747CCState &CCInfo,
748SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
749CallLoweringInfo &Info) const{
750MachineFunction &MF = MIRBuilder.getMF();
751
752// If there's no call site, this doesn't correspond to a call from the IR and
753// doesn't need implicit inputs.
754if (!Info.CB)
755returntrue;
756
757constAMDGPUFunctionArgInfo *CalleeArgInfo
758 = &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
759
760constSIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
761constAMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
762
763
764// TODO: Unify with private memory register handling. This is complicated by
765// the fact that at least in kernels, the input argument is not necessarily
766// in the same location as the input.
767AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
768AMDGPUFunctionArgInfo::DISPATCH_PTR,
769AMDGPUFunctionArgInfo::QUEUE_PTR,
770AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
771AMDGPUFunctionArgInfo::DISPATCH_ID,
772AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
773AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
774AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
775AMDGPUFunctionArgInfo::LDS_KERNEL_ID,
776 };
777
778staticconstexprStringLiteral ImplicitAttrNames[] = {
779"amdgpu-no-dispatch-ptr",
780"amdgpu-no-queue-ptr",
781"amdgpu-no-implicitarg-ptr",
782"amdgpu-no-dispatch-id",
783"amdgpu-no-workgroup-id-x",
784"amdgpu-no-workgroup-id-y",
785"amdgpu-no-workgroup-id-z",
786"amdgpu-no-lds-kernel-id",
787 };
788
789MachineRegisterInfo &MRI = MF.getRegInfo();
790
791constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
792constAMDGPULegalizerInfo *LI
793 =static_cast<constAMDGPULegalizerInfo*>(ST.getLegalizerInfo());
794
795unsignedI = 0;
796for (auto InputID : InputRegs) {
797constArgDescriptor *OutgoingArg;
798constTargetRegisterClass *ArgRC;
799LLT ArgTy;
800
801// If the callee does not use the attribute value, skip copying the value.
802if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
803continue;
804
805 std::tie(OutgoingArg, ArgRC, ArgTy) =
806 CalleeArgInfo->getPreloadedValue(InputID);
807if (!OutgoingArg)
808continue;
809
810constArgDescriptor *IncomingArg;
811constTargetRegisterClass *IncomingArgRC;
812 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
813 CallerArgInfo.getPreloadedValue(InputID);
814assert(IncomingArgRC == ArgRC);
815
816Register InputReg =MRI.createGenericVirtualRegister(ArgTy);
817
818if (IncomingArg) {
819 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
820 }elseif (InputID ==AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
821 LI->getImplicitArgPtr(InputReg,MRI, MIRBuilder);
822 }elseif (InputID ==AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
823 std::optional<uint32_t> Id =
824AMDGPUMachineFunction::getLDSKernelIdMetadata(MF.getFunction());
825if (Id) {
826 MIRBuilder.buildConstant(InputReg, *Id);
827 }else {
828 MIRBuilder.buildUndef(InputReg);
829 }
830 }else {
831// We may have proven the input wasn't needed, although the ABI is
832// requiring it. We just need to allocate the register appropriately.
833 MIRBuilder.buildUndef(InputReg);
834 }
835
836if (OutgoingArg->isRegister()) {
837 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
838if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
839report_fatal_error("failed to allocate implicit input argument");
840 }else {
841LLVM_DEBUG(dbgs() <<"Unhandled stack passed implicit input argument\n");
842returnfalse;
843 }
844 }
845
846// Pack workitem IDs into a single register or pass it as is if already
847// packed.
848constArgDescriptor *OutgoingArg;
849constTargetRegisterClass *ArgRC;
850LLT ArgTy;
851
852 std::tie(OutgoingArg, ArgRC, ArgTy) =
853 CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
854if (!OutgoingArg)
855 std::tie(OutgoingArg, ArgRC, ArgTy) =
856 CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
857if (!OutgoingArg)
858 std::tie(OutgoingArg, ArgRC, ArgTy) =
859 CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
860if (!OutgoingArg)
861returnfalse;
862
863auto WorkitemIDX =
864 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
865auto WorkitemIDY =
866 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
867auto WorkitemIDZ =
868 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
869
870constArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
871constArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
872constArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
873constLLTS32 =LLT::scalar(32);
874
875constbool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
876constbool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
877constbool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
878
879// If incoming ids are not packed we need to pack them.
880// FIXME: Should consider known workgroup size to eliminate known 0 cases.
881Register InputReg;
882if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
883 NeedWorkItemIDX) {
884if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
885 InputReg =MRI.createGenericVirtualRegister(S32);
886 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
887 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
888 }else {
889 InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
890 }
891 }
892
893if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
894 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
895RegisterY =MRI.createGenericVirtualRegister(S32);
896 LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
897 std::get<2>(WorkitemIDY));
898
899Y = MIRBuilder.buildShl(S32,Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
900 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg,Y).getReg(0) :Y;
901 }
902
903if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
904 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
905Register Z =MRI.createGenericVirtualRegister(S32);
906 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
907 std::get<2>(WorkitemIDZ));
908
909 Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
910 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
911 }
912
913if (!InputReg &&
914 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
915 InputReg =MRI.createGenericVirtualRegister(S32);
916if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
917// We're in a situation where the outgoing function requires the workitem
918// ID, but the calling function does not have it (e.g a graphics function
919// calling a C calling convention function). This is illegal, but we need
920// to produce something.
921 MIRBuilder.buildUndef(InputReg);
922 }else {
923// Workitem ids are already packed, any of present incoming arguments will
924// carry all required fields.
925ArgDescriptor IncomingArg =ArgDescriptor::createArg(
926 IncomingArgX ? *IncomingArgX :
927 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
928 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
929 &AMDGPU::VGPR_32RegClass,S32);
930 }
931 }
932
933if (OutgoingArg->isRegister()) {
934if (InputReg)
935 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
936
937if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
938report_fatal_error("failed to allocate implicit input argument");
939 }else {
940LLVM_DEBUG(dbgs() <<"Unhandled stack passed implicit input argument\n");
941returnfalse;
942 }
943
944returntrue;
945}
946
947/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
948/// CC.
949static std::pair<CCAssignFn *, CCAssignFn *>
950getAssignFnsForCC(CallingConv::IDCC,constSITargetLowering &TLI) {
951return {TLI.CCAssignFnForCall(CC,false), TLI.CCAssignFnForCall(CC,true)};
952}
953
954staticunsignedgetCallOpcode(constMachineFunction &CallerF,bool IsIndirect,
955bool IsTailCall,bool isWave32,
956CallingConv::IDCC) {
957// For calls to amdgpu_cs_chain functions, the address is known to be uniform.
958assert((AMDGPU::isChainCC(CC) || !IsIndirect || !IsTailCall) &&
959"Indirect calls can't be tail calls, "
960"because the address can be divergent");
961if (!IsTailCall)
962return AMDGPU::G_SI_CALL;
963
964if (AMDGPU::isChainCC(CC))
965return isWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
966
967returnCC ==CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
968 AMDGPU::SI_TCRETURN;
969}
970
971// Add operands to call instruction to track the callee.
972staticbooladdCallTargetOperands(MachineInstrBuilder &CallInst,
973MachineIRBuilder &MIRBuilder,
974AMDGPUCallLowering::CallLoweringInfo &Info) {
975if (Info.Callee.isReg()) {
976CallInst.addReg(Info.Callee.getReg());
977CallInst.addImm(0);
978 }elseif (Info.Callee.isGlobal() &&Info.Callee.getOffset() == 0) {
979// The call lowering lightly assumed we can directly encode a call target in
980// the instruction, which is not the case. Materialize the address here.
981constGlobalValue *GV =Info.Callee.getGlobal();
982autoPtr = MIRBuilder.buildGlobalValue(
983LLT::pointer(GV->getAddressSpace(), 64), GV);
984CallInst.addReg(Ptr.getReg(0));
985CallInst.add(Info.Callee);
986 }else
987returnfalse;
988
989returntrue;
990}
991
992boolAMDGPUCallLowering::doCallerAndCalleePassArgsTheSameWay(
993CallLoweringInfo &Info,MachineFunction &MF,
994SmallVectorImpl<ArgInfo> &InArgs) const{
995constFunction &CallerF = MF.getFunction();
996CallingConv::ID CalleeCC =Info.CallConv;
997CallingConv::ID CallerCC = CallerF.getCallingConv();
998
999// If the calling conventions match, then everything must be the same.
1000if (CalleeCC == CallerCC)
1001returntrue;
1002
1003constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1004
1005// Make sure that the caller and callee preserve all of the same registers.
1006constauto *TRI = ST.getRegisterInfo();
1007
1008constuint32_t *CallerPreserved =TRI->getCallPreservedMask(MF, CallerCC);
1009constuint32_t *CalleePreserved =TRI->getCallPreservedMask(MF, CalleeCC);
1010if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1011returnfalse;
1012
1013// Check if the caller and callee will handle arguments in the same way.
1014constSITargetLowering &TLI = *getTLI<SITargetLowering>();
1015CCAssignFn *CalleeAssignFnFixed;
1016CCAssignFn *CalleeAssignFnVarArg;
1017 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1018getAssignFnsForCC(CalleeCC, TLI);
1019
1020CCAssignFn *CallerAssignFnFixed;
1021CCAssignFn *CallerAssignFnVarArg;
1022 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1023getAssignFnsForCC(CallerCC, TLI);
1024
1025// FIXME: We are not accounting for potential differences in implicitly passed
1026// inputs, but only the fixed ABI is supported now anyway.
1027IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
1028 CalleeAssignFnVarArg);
1029IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
1030 CallerAssignFnVarArg);
1031returnresultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner);
1032}
1033
1034boolAMDGPUCallLowering::areCalleeOutgoingArgsTailCallable(
1035CallLoweringInfo &Info,MachineFunction &MF,
1036SmallVectorImpl<ArgInfo> &OutArgs) const{
1037// If there are no outgoing arguments, then we are done.
1038if (OutArgs.empty())
1039returntrue;
1040
1041constFunction &CallerF = MF.getFunction();
1042CallingConv::ID CalleeCC =Info.CallConv;
1043CallingConv::ID CallerCC = CallerF.getCallingConv();
1044constSITargetLowering &TLI = *getTLI<SITargetLowering>();
1045
1046CCAssignFn *AssignFnFixed;
1047CCAssignFn *AssignFnVarArg;
1048 std::tie(AssignFnFixed, AssignFnVarArg) =getAssignFnsForCC(CalleeCC, TLI);
1049
1050// We have outgoing arguments. Make sure that we can tail call with them.
1051SmallVector<CCValAssign, 16> OutLocs;
1052CCState OutInfo(CalleeCC,false, MF, OutLocs, CallerF.getContext());
1053OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1054
1055if (!determineAssignments(Assigner, OutArgs, OutInfo)) {
1056LLVM_DEBUG(dbgs() <<"... Could not analyze call operands.\n");
1057returnfalse;
1058 }
1059
1060// Make sure that they can fit on the caller's stack.
1061constSIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1062if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
1063LLVM_DEBUG(dbgs() <<"... Cannot fit call operands on caller's stack.\n");
1064returnfalse;
1065 }
1066
1067// Verify that the parameters in callee-saved registers match.
1068constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1069constSIRegisterInfo *TRI = ST.getRegisterInfo();
1070constuint32_t *CallerPreservedMask =TRI->getCallPreservedMask(MF, CallerCC);
1071MachineRegisterInfo &MRI = MF.getRegInfo();
1072returnparametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
1073}
1074
1075/// Return true if the calling convention is one that we can guarantee TCO for.
1076staticboolcanGuaranteeTCO(CallingConv::IDCC) {
1077returnCC ==CallingConv::Fast;
1078}
1079
1080/// Return true if we might ever do TCO for calls with this calling convention.
1081staticboolmayTailCallThisCC(CallingConv::IDCC) {
1082switch (CC) {
1083caseCallingConv::C:
1084caseCallingConv::AMDGPU_Gfx:
1085returntrue;
1086default:
1087returncanGuaranteeTCO(CC);
1088 }
1089}
1090
1091boolAMDGPUCallLowering::isEligibleForTailCallOptimization(
1092MachineIRBuilder &B,CallLoweringInfo &Info,
1093SmallVectorImpl<ArgInfo> &InArgs,SmallVectorImpl<ArgInfo> &OutArgs) const{
1094// Must pass all target-independent checks in order to tail call optimize.
1095if (!Info.IsTailCall)
1096returnfalse;
1097
1098// Indirect calls can't be tail calls, because the address can be divergent.
1099// TODO Check divergence info if the call really is divergent.
1100if (Info.Callee.isReg())
1101returnfalse;
1102
1103MachineFunction &MF =B.getMF();
1104constFunction &CallerF = MF.getFunction();
1105CallingConv::ID CalleeCC =Info.CallConv;
1106CallingConv::ID CallerCC = CallerF.getCallingConv();
1107
1108constSIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1109constuint32_t *CallerPreserved =TRI->getCallPreservedMask(MF, CallerCC);
1110// Kernels aren't callable, and don't have a live in return address so it
1111// doesn't make sense to do a tail call with entry functions.
1112if (!CallerPreserved)
1113returnfalse;
1114
1115if (!mayTailCallThisCC(CalleeCC)) {
1116LLVM_DEBUG(dbgs() <<"... Calling convention cannot be tail called.\n");
1117returnfalse;
1118 }
1119
1120if (any_of(CallerF.args(), [](constArgument &A) {
1121 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1122 })) {
1123LLVM_DEBUG(dbgs() <<"... Cannot tail call from callers with byval "
1124"or swifterror arguments\n");
1125returnfalse;
1126 }
1127
1128// If we have -tailcallopt, then we're done.
1129if (MF.getTarget().Options.GuaranteedTailCallOpt)
1130returncanGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
1131
1132// Verify that the incoming and outgoing arguments from the callee are
1133// safe to tail call.
1134if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1135LLVM_DEBUG(
1136dbgs()
1137 <<"... Caller and callee have incompatible calling conventions.\n");
1138returnfalse;
1139 }
1140
1141// FIXME: We need to check if any arguments passed in SGPR are uniform. If
1142// they are not, this cannot be a tail call. If they are uniform, but may be
1143// VGPR, we need to insert readfirstlanes.
1144if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
1145returnfalse;
1146
1147LLVM_DEBUG(dbgs() <<"... Call is eligible for tail call optimization.\n");
1148returntrue;
1149}
1150
1151// Insert outgoing implicit arguments for a call, by inserting copies to the
1152// implicit argument registers and adding the necessary implicit uses to the
1153// call instruction.
1154voidAMDGPUCallLowering::handleImplicitCallArguments(
1155MachineIRBuilder &MIRBuilder,MachineInstrBuilder &CallInst,
1156constGCNSubtarget &ST,constSIMachineFunctionInfo &FuncInfo,
1157CallingConv::ID CalleeCC,
1158ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const{
1159if (!ST.enableFlatScratch()) {
1160// Insert copies for the SRD. In the HSA case, this should be an identity
1161// copy.
1162auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32),
1163 FuncInfo.getScratchRSrcReg());
1164
1165auto CalleeRSrcReg =AMDGPU::isChainCC(CalleeCC)
1166 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
1167 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
1168
1169 MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);
1170CallInst.addReg(CalleeRSrcReg,RegState::Implicit);
1171 }
1172
1173for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1174 MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second);
1175CallInst.addReg(ArgReg.first,RegState::Implicit);
1176 }
1177}
1178
1179boolAMDGPUCallLowering::lowerTailCall(
1180MachineIRBuilder &MIRBuilder,CallLoweringInfo &Info,
1181SmallVectorImpl<ArgInfo> &OutArgs) const{
1182MachineFunction &MF = MIRBuilder.getMF();
1183constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1184SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1185constFunction &F = MF.getFunction();
1186MachineRegisterInfo &MRI = MF.getRegInfo();
1187constSITargetLowering &TLI = *getTLI<SITargetLowering>();
1188
1189// True when we're tail calling, but without -tailcallopt.
1190bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
1191
1192// Find out which ABI gets to decide where things go.
1193CallingConv::ID CalleeCC =Info.CallConv;
1194CCAssignFn *AssignFnFixed;
1195CCAssignFn *AssignFnVarArg;
1196 std::tie(AssignFnFixed, AssignFnVarArg) =getAssignFnsForCC(CalleeCC, TLI);
1197
1198MachineInstrBuilder CallSeqStart;
1199if (!IsSibCall)
1200 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
1201
1202unsigned Opc =
1203getCallOpcode(MF,Info.Callee.isReg(),true, ST.isWave32(), CalleeCC);
1204auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1205if (!addCallTargetOperands(MIB, MIRBuilder,Info))
1206returnfalse;
1207
1208// Byte offset for the tail call. When we are sibcalling, this will always
1209// be 0.
1210 MIB.addImm(0);
1211
1212// If this is a chain call, we need to pass in the EXEC mask.
1213constSIRegisterInfo *TRI = ST.getRegisterInfo();
1214if (AMDGPU::isChainCC(Info.CallConv)) {
1215ArgInfo ExecArg =Info.OrigArgs[1];
1216assert(ExecArg.Regs.size() == 1 &&"Too many regs for EXEC");
1217
1218if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize()))
1219returnfalse;
1220
1221if (constauto *CI = dyn_cast<ConstantInt>(ExecArg.OrigValue)) {
1222 MIB.addImm(CI->getSExtValue());
1223 }else {
1224 MIB.addReg(ExecArg.Regs[0]);
1225unsignedIdx = MIB->getNumOperands() - 1;
1226 MIB->getOperand(Idx).setReg(constrainOperandRegClass(
1227 MF, *TRI,MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1228 MIB->getDesc(), MIB->getOperand(Idx),Idx));
1229 }
1230 }
1231
1232// Tell the call which registers are clobbered.
1233constuint32_t *Mask =TRI->getCallPreservedMask(MF, CalleeCC);
1234 MIB.addRegMask(Mask);
1235
1236// FPDiff is the byte offset of the call's argument area from the callee's.
1237// Stores to callee stack arguments will be placed in FixedStackSlots offset
1238// by this amount for a tail call. In a sibling call it must be 0 because the
1239// caller will deallocate the entire stack and the callee still expects its
1240// arguments to begin at SP+0.
1241int FPDiff = 0;
1242
1243// This will be 0 for sibcalls, potentially nonzero for tail calls produced
1244// by -tailcallopt. For sibcalls, the memory operands for the call are
1245// already available in the caller's incoming argument space.
1246unsigned NumBytes = 0;
1247if (!IsSibCall) {
1248// We aren't sibcalling, so we need to compute FPDiff. We need to do this
1249// before handling assignments, because FPDiff must be known for memory
1250// arguments.
1251unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1252SmallVector<CCValAssign, 16> OutLocs;
1253CCState OutInfo(CalleeCC,false, MF, OutLocs,F.getContext());
1254
1255// FIXME: Not accounting for callee implicit inputs
1256OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg);
1257if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1258returnfalse;
1259
1260// The callee will pop the argument stack as a tail call. Thus, we must
1261// keep it 16-byte aligned.
1262 NumBytes =alignTo(OutInfo.getStackSize(), ST.getStackAlignment());
1263
1264// FPDiff will be negative if this tail call requires more space than we
1265// would automatically have in our incoming argument space. Positive if we
1266// actually shrink the stack.
1267 FPDiff = NumReusableBytes - NumBytes;
1268
1269// The stack pointer must be 16-byte aligned at all times it's used for a
1270// memory operation, which in practice means at *all* times and in
1271// particular across call boundaries. Therefore our own arguments started at
1272// a 16-byte aligned SP and the delta applied for the tail call should
1273// satisfy the same constraint.
1274assert(isAligned(ST.getStackAlignment(), FPDiff) &&
1275"unaligned stack on tail call");
1276 }
1277
1278SmallVector<CCValAssign, 16> ArgLocs;
1279CCState CCInfo(Info.CallConv,Info.IsVarArg, MF, ArgLocs,F.getContext());
1280
1281// We could pass MIB and directly add the implicit uses to the call
1282// now. However, as an aesthetic choice, place implicit argument operands
1283// after the ordinary user argument registers.
1284SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
1285
1286if (Info.CallConv !=CallingConv::AMDGPU_Gfx &&
1287 !AMDGPU::isChainCC(Info.CallConv)) {
1288// With a fixed ABI, allocate fixed registers before user arguments.
1289if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs,Info))
1290returnfalse;
1291 }
1292
1293OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1294
1295if (!determineAssignments(Assigner, OutArgs, CCInfo))
1296returnfalse;
1297
1298// Do the actual argument marshalling.
1299 AMDGPUOutgoingArgHandler Handler(MIRBuilder,MRI, MIB,true, FPDiff);
1300if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1301returnfalse;
1302
1303if (Info.ConvergenceCtrlToken) {
1304 MIB.addUse(Info.ConvergenceCtrlToken,RegState::Implicit);
1305 }
1306handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, CalleeCC,
1307 ImplicitArgRegs);
1308
1309// If we have -tailcallopt, we need to adjust the stack. We'll do the call
1310// sequence start and end here.
1311if (!IsSibCall) {
1312 MIB->getOperand(1).setImm(FPDiff);
1313 CallSeqStart.addImm(NumBytes).addImm(0);
1314// End the call sequence *before* emitting the call. Normally, we would
1315// tidy the frame up after the call. However, here, we've laid out the
1316// parameters so that when SP is reset, they will be in the correct
1317// location.
1318 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN).addImm(NumBytes).addImm(0);
1319 }
1320
1321// Now we can add the actual call instruction to the correct basic block.
1322 MIRBuilder.insertInstr(MIB);
1323
1324// If Callee is a reg, since it is used by a target specific
1325// instruction, it must have a register class matching the
1326// constraint of that instruction.
1327
1328// FIXME: We should define regbankselectable call instructions to handle
1329// divergent call targets.
1330if (MIB->getOperand(0).isReg()) {
1331 MIB->getOperand(0).setReg(constrainOperandRegClass(
1332 MF, *TRI,MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1333 MIB->getDesc(), MIB->getOperand(0), 0));
1334 }
1335
1336 MF.getFrameInfo().setHasTailCall();
1337Info.LoweredTailCall =true;
1338returntrue;
1339}
1340
1341/// Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
1342boolAMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,
1343CallLoweringInfo &Info) const{
1344ArgInfo Callee =Info.OrigArgs[0];
1345ArgInfo SGPRArgs =Info.OrigArgs[2];
1346ArgInfo VGPRArgs =Info.OrigArgs[3];
1347ArgInfo Flags =Info.OrigArgs[4];
1348
1349assert(cast<ConstantInt>(Flags.OrigValue)->isZero() &&
1350"Non-zero flags aren't supported yet.");
1351assert(Info.OrigArgs.size() == 5 &&"Additional args aren't supported yet.");
1352
1353MachineFunction &MF = MIRBuilder.getMF();
1354constFunction &F = MF.getFunction();
1355constDataLayout &DL =F.getDataLayout();
1356
1357// The function to jump to is actually the first argument, so we'll change the
1358// Callee and other info to match that before using our existing helper.
1359constValue *CalleeV = Callee.OrigValue->stripPointerCasts();
1360if (constFunction *F = dyn_cast<Function>(CalleeV)) {
1361Info.Callee =MachineOperand::CreateGA(F, 0);
1362Info.CallConv =F->getCallingConv();
1363 }else {
1364assert(Callee.Regs.size() == 1 &&"Too many regs for the callee");
1365Info.Callee =MachineOperand::CreateReg(Callee.Regs[0],false);
1366Info.CallConv =CallingConv::AMDGPU_CS_Chain;// amdgpu_cs_chain_preserve
1367// behaves the same here.
1368 }
1369
1370// The function that we're calling cannot be vararg (only the intrinsic is).
1371Info.IsVarArg =false;
1372
1373assert(
1374all_of(SGPRArgs.Flags, [](ISD::ArgFlagsTyF) { return F.isInReg(); }) &&
1375"SGPR arguments should be marked inreg");
1376assert(
1377none_of(VGPRArgs.Flags, [](ISD::ArgFlagsTyF) { return F.isInReg(); }) &&
1378"VGPR arguments should not be marked inreg");
1379
1380SmallVector<ArgInfo, 8> OutArgs;
1381splitToValueTypes(SGPRArgs, OutArgs,DL,Info.CallConv);
1382splitToValueTypes(VGPRArgs, OutArgs,DL,Info.CallConv);
1383
1384Info.IsMustTailCall =true;
1385returnlowerTailCall(MIRBuilder,Info, OutArgs);
1386}
1387
1388boolAMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1389CallLoweringInfo &Info) const{
1390if (Function *F =Info.CB->getCalledFunction())
1391if (F->isIntrinsic()) {
1392assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
1393"Unexpected intrinsic");
1394returnlowerChainCall(MIRBuilder,Info);
1395 }
1396
1397if (Info.IsVarArg) {
1398LLVM_DEBUG(dbgs() <<"Variadic functions not implemented\n");
1399returnfalse;
1400 }
1401
1402MachineFunction &MF = MIRBuilder.getMF();
1403constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1404constSIRegisterInfo *TRI = ST.getRegisterInfo();
1405
1406constFunction &F = MF.getFunction();
1407MachineRegisterInfo &MRI = MF.getRegInfo();
1408constSITargetLowering &TLI = *getTLI<SITargetLowering>();
1409constDataLayout &DL =F.getDataLayout();
1410
1411SmallVector<ArgInfo, 8> OutArgs;
1412for (auto &OrigArg :Info.OrigArgs)
1413splitToValueTypes(OrigArg, OutArgs,DL,Info.CallConv);
1414
1415SmallVector<ArgInfo, 8> InArgs;
1416if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())
1417splitToValueTypes(Info.OrigRet, InArgs,DL,Info.CallConv);
1418
1419// If we can lower as a tail call, do that instead.
1420bool CanTailCallOpt =
1421isEligibleForTailCallOptimization(MIRBuilder,Info, InArgs, OutArgs);
1422
1423// We must emit a tail call if we have musttail.
1424if (Info.IsMustTailCall && !CanTailCallOpt) {
1425LLVM_DEBUG(dbgs() <<"Failed to lower musttail call as tail call\n");
1426returnfalse;
1427 }
1428
1429Info.IsTailCall = CanTailCallOpt;
1430if (CanTailCallOpt)
1431returnlowerTailCall(MIRBuilder,Info, OutArgs);
1432
1433// Find out which ABI gets to decide where things go.
1434CCAssignFn *AssignFnFixed;
1435CCAssignFn *AssignFnVarArg;
1436 std::tie(AssignFnFixed, AssignFnVarArg) =
1437getAssignFnsForCC(Info.CallConv, TLI);
1438
1439 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
1440 .addImm(0)
1441 .addImm(0);
1442
1443// Create a temporarily-floating call instruction so we can add the implicit
1444// uses of arg registers.
1445unsigned Opc =getCallOpcode(MF,Info.Callee.isReg(),false, ST.isWave32(),
1446Info.CallConv);
1447
1448auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1449 MIB.addDef(TRI->getReturnAddressReg(MF));
1450
1451if (!Info.IsConvergent)
1452 MIB.setMIFlag(MachineInstr::NoConvergent);
1453
1454if (!addCallTargetOperands(MIB, MIRBuilder,Info))
1455returnfalse;
1456
1457// Tell the call which registers are clobbered.
1458constuint32_t *Mask =TRI->getCallPreservedMask(MF,Info.CallConv);
1459 MIB.addRegMask(Mask);
1460
1461SmallVector<CCValAssign, 16> ArgLocs;
1462CCState CCInfo(Info.CallConv,Info.IsVarArg, MF, ArgLocs,F.getContext());
1463
1464// We could pass MIB and directly add the implicit uses to the call
1465// now. However, as an aesthetic choice, place implicit argument operands
1466// after the ordinary user argument registers.
1467SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
1468
1469if (Info.CallConv !=CallingConv::AMDGPU_Gfx) {
1470// With a fixed ABI, allocate fixed registers before user arguments.
1471if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs,Info))
1472returnfalse;
1473 }
1474
1475// Do the actual argument marshalling.
1476SmallVector<Register, 8> PhysRegs;
1477
1478OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1479if (!determineAssignments(Assigner, OutArgs, CCInfo))
1480returnfalse;
1481
1482 AMDGPUOutgoingArgHandler Handler(MIRBuilder,MRI, MIB,false);
1483if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1484returnfalse;
1485
1486constSIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1487
1488if (Info.ConvergenceCtrlToken) {
1489 MIB.addUse(Info.ConvergenceCtrlToken,RegState::Implicit);
1490 }
1491handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI,Info.CallConv,
1492 ImplicitArgRegs);
1493
1494// Get a count of how many bytes are to be pushed on the stack.
1495unsigned NumBytes = CCInfo.getStackSize();
1496
1497// If Callee is a reg, since it is used by a target specific
1498// instruction, it must have a register class matching the
1499// constraint of that instruction.
1500
1501// FIXME: We should define regbankselectable call instructions to handle
1502// divergent call targets.
1503if (MIB->getOperand(1).isReg()) {
1504 MIB->getOperand(1).setReg(constrainOperandRegClass(
1505 MF, *TRI,MRI, *ST.getInstrInfo(),
1506 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1507 1));
1508 }
1509
1510// Now we can add the actual call instruction to the correct position.
1511 MIRBuilder.insertInstr(MIB);
1512
1513// Finally we can copy the returned value back into its virtual-register. In
1514// symmetry with the arguments, the physical register must be an
1515// implicit-define of the call instruction.
1516if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1517CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
1518Info.IsVarArg);
1519IncomingValueAssigner Assigner(RetAssignFn);
1520 CallReturnHandler Handler(MIRBuilder,MRI, MIB);
1521if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
1522Info.CallConv,Info.IsVarArg))
1523returnfalse;
1524 }
1525
1526uint64_t CalleePopBytes = NumBytes;
1527
1528 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1529 .addImm(0)
1530 .addImm(CalleePopBytes);
1531
1532if (!Info.CanLowerReturn) {
1533insertSRetLoads(MIRBuilder,Info.OrigRet.Ty,Info.OrigRet.Regs,
1534Info.DemoteRegister,Info.DemoteStackIndex);
1535 }
1536
1537returntrue;
1538}
MRI
unsigned const MachineRegisterInfo * MRI
Definition:AArch64AdvSIMDScalarPass.cpp:105
getCallOpcode
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
Definition:AArch64CallLowering.cpp:1022
getAssignFnsForCC
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
Definition:AArch64CallLowering.cpp:806
canGuaranteeTCO
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
Definition:AArch64ISelLowering.cpp:8458
mayTailCallThisCC
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
Definition:AArch64ISelLowering.cpp:8464
addCallTargetOperands
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
Definition:AMDGPUCallLowering.cpp:972
extOpcodeToISDExtOpcode
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
Definition:AMDGPUCallLowering.cpp:262
allocateHSAUserSGPRs
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
Definition:AMDGPUCallLowering.cpp:448
AMDGPUCallLowering.h
This file describes how to lower LLVM calls to machine code calls.
S32
static const LLT S32
Definition:AMDGPULegalizerInfo.cpp:285
AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.
AMDGPU.h
MBB
MachineBasicBlock & MBB
Definition:ARMSLSHardening.cpp:71
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Info
Analysis containing CSE Info
Definition:CSEInfo.cpp:27
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition:Debug.h:106
Addr
uint64_t Addr
Definition:ELFObjHandler.cpp:79
Size
uint64_t Size
Definition:ELFObjHandler.cpp:81
FunctionLoweringInfo.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
MachineFrameInfo.h
MachineIRBuilder.h
This file declares the MachineIRBuilder class.
TRI
unsigned const TargetRegisterInfo * TRI
Definition:MachineSink.cpp:2029
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition:MipsDisassembler.cpp:520
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SIMachineFunctionInfo.h
SIRegisterInfo.h
Interface definition for SIRegisterInfo.
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
llvm::AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
Definition:AMDGPUArgumentUsageInfo.h:182
llvm::AMDGPUCallLowering::lowerTailCall
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
Definition:AMDGPUCallLowering.cpp:1179
llvm::AMDGPUCallLowering::isEligibleForTailCallOptimization
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
Definition:AMDGPUCallLowering.cpp:1091
llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
Definition:AMDGPUCallLowering.cpp:500
llvm::AMDGPUCallLowering::lowerReturn
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
Definition:AMDGPUCallLowering.cpp:357
llvm::AMDGPUCallLowering::handleImplicitCallArguments
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, CallingConv::ID CalleeCC, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
Definition:AMDGPUCallLowering.cpp:1154
llvm::AMDGPUCallLowering::areCalleeOutgoingArgsTailCallable
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
Definition:AMDGPUCallLowering.cpp:1034
llvm::AMDGPUCallLowering::lowerChainCall
bool lowerChainCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const
Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
Definition:AMDGPUCallLowering.cpp:1342
llvm::AMDGPUCallLowering::AMDGPUCallLowering
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
Definition:AMDGPUCallLowering.cpp:257
llvm::AMDGPUCallLowering::passSpecialInputs
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
Definition:AMDGPUCallLowering.cpp:746
llvm::AMDGPUCallLowering::lowerFormalArguments
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
Definition:AMDGPUCallLowering.cpp:577
llvm::AMDGPUCallLowering::lowerCall
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
Definition:AMDGPUCallLowering.cpp:1388
llvm::AMDGPUCallLowering::doCallerAndCalleePassArgsTheSameWay
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
Definition:AMDGPUCallLowering.cpp:992
llvm::AMDGPULegalizerInfo
Definition:AMDGPULegalizerInfo.h:30
llvm::AMDGPUMachineFunction::getLDSKernelIdMetadata
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
Definition:AMDGPUMachineFunction.cpp:173
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition:AMDGPUSubtarget.h:294
llvm::AMDGPUTargetLowering
Definition:AMDGPUISelLowering.h:27
llvm::AMDGPUTargetLowering::getTypeForExtReturn
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
Definition:AMDGPUISelLowering.cpp:788
llvm::AMDGPUTargetLowering::CCAssignFnForReturn
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
Definition:AMDGPUISelLowering.cpp:1332
llvm::AMDGPUTargetLowering::CCAssignFnForCall
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
Definition:AMDGPUISelLowering.cpp:1327
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition:Argument.h:31
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition:ArrayRef.h:168
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition:ArrayRef.h:163
llvm::AttributeList::ReturnIndex
@ ReturnIndex
Definition:Attributes.h:493
llvm::AttributeList::FirstArgIndex
@ FirstArgIndex
Definition:Attributes.h:495
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition:CallingConvLower.h:170
llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition:CallingConvLower.h:330
llvm::CCState::getStackSize
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
Definition:CallingConvLower.h:245
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition:CallingConvLower.h:33
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition:CallingConvLower.h:134
llvm::CCValAssign::getLocMemOffset
int64_t getLocMemOffset() const
Definition:CallingConvLower.h:129
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition:CallingConvLower.h:132
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::CallLowering
Definition:CallLowering.h:44
llvm::CallLowering::insertSRetLoads
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
Definition:CallLowering.cpp:989
llvm::CallLowering::handleAssignments
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs={}) const
Use Handler to insert code to handle the argument/return values represented by Args.
Definition:CallLowering.cpp:734
llvm::CallLowering::resultsCompatible
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
Definition:CallLowering.cpp:1191
llvm::CallLowering::splitToValueTypes
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
Definition:CallLowering.cpp:286
llvm::CallLowering::insertSRetIncomingArgument
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
Definition:CallLowering.cpp:1050
llvm::CallLowering::determineAndHandleAssignments
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs={}) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
Definition:CallLowering.cpp:648
llvm::CallLowering::insertSRetStores
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
Definition:CallLowering.cpp:1020
llvm::CallLowering::parametersInCSRMatch
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
Definition:CallLowering.cpp:1139
llvm::CallLowering::determineAssignments
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
Definition:CallLowering.cpp:673
llvm::CallLowering::checkReturn
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
Definition:CallLowering.cpp:1094
llvm::CallLowering::setArgFlags
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
Definition:CallLowering.cpp:219
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::FormalArgHandler
Definition:PPCCallLowering.h:62
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition:FunctionLoweringInfo.h:57
llvm::Function
Definition:Function.h:63
llvm::Function::args
iterator_range< arg_iterator > args()
Definition:Function.h:892
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition:Function.h:277
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition:Function.cpp:369
llvm::GCNSubtarget
Definition:GCNSubtarget.h:34
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition:GCNSubtarget.h:291
llvm::GCNUserSGPRUsageInfo
Definition:GCNSubtarget.h:1660
llvm::GCNUserSGPRUsageInfo::hasQueuePtr
bool hasQueuePtr() const
Definition:GCNSubtarget.h:1668
llvm::GCNUserSGPRUsageInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition:GCNSubtarget.h:1670
llvm::GCNUserSGPRUsageInfo::hasDispatchID
bool hasDispatchID() const
Definition:GCNSubtarget.h:1672
llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition:GCNSubtarget.h:1664
llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition:GCNSubtarget.h:1662
llvm::GCNUserSGPRUsageInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition:GCNSubtarget.h:1666
llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition:GCNSubtarget.h:1674
llvm::GlobalValue
Definition:GlobalValue.h:48
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition:GlobalValue.h:206
llvm::LLT
Definition:LowLevelType.h:39
llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition:LowLevelType.h:264
llvm::LLT::vector
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition:LowLevelType.h:64
llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition:LowLevelType.h:42
llvm::LLT::isVector
constexpr bool isVector() const
Definition:LowLevelType.h:148
llvm::LLT::pointer
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition:LowLevelType.h:57
llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition:LowLevelType.h:190
llvm::LLT::isPointer
constexpr bool isPointer() const
Definition:LowLevelType.h:149
llvm::LLT::getElementCount
constexpr ElementCount getElementCount() const
Definition:LowLevelType.h:183
llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition:LowLevelType.h:100
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition:LLVMContext.h:67
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition:MachineValueType.h:308
llvm::MachineBasicBlock
Definition:MachineBasicBlock.h:125
llvm::MachineBasicBlock::empty
bool empty() const
Definition:MachineBasicBlock.h:327
llvm::MachineBasicBlock::begin
iterator begin()
Definition:MachineBasicBlock.h:355
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition:MachineFrameInfo.cpp:83
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition:MachineFrameInfo.h:647
llvm::MachineFunction
Definition:MachineFunction.h:267
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition:MachineFunction.h:733
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition:MachineFunction.cpp:536
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition:MachineFunction.h:749
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition:MachineFunction.h:743
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition:MachineFunction.cpp:309
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition:MachineFunction.h:704
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition:MachineFunction.h:831
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition:MachineFunction.cpp:762
llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition:MachineFunction.h:729
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition:MachineIRBuilder.h:235
llvm::MachineIRBuilder::insertInstr
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
Definition:MachineIRBuilder.cpp:45
llvm::MachineIRBuilder::buildGlobalValue
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
Definition:MachineIRBuilder.cpp:156
llvm::MachineIRBuilder::buildUndef
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
Definition:MachineIRBuilder.cpp:641
llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition:MachineIRBuilder.cpp:202
llvm::MachineIRBuilder::buildShl
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Definition:MachineIRBuilder.h:1852
llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition:MachineIRBuilder.cpp:468
llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition:MachineIRBuilder.h:417
llvm::MachineIRBuilder::buildFrameIndex
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
Definition:MachineIRBuilder.cpp:147
llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition:MachineIRBuilder.h:287
llvm::MachineIRBuilder::buildAnyExt
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
Definition:MachineIRBuilder.cpp:495
llvm::MachineIRBuilder::buildOr
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
Definition:MachineIRBuilder.h:1896
llvm::MachineIRBuilder::buildInstrNoInsert
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
Definition:MachineIRBuilder.cpp:40
llvm::MachineIRBuilder::buildCopy
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Definition:MachineIRBuilder.cpp:312
llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition:MachineIRBuilder.cpp:317
llvm::MachineInstrBuilder
Definition:MachineInstrBuilder.h:71
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition:MachineInstrBuilder.h:96
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition:MachineInstrBuilder.h:280
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition:MachineInstrBuilder.h:133
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition:MachineInstrBuilder.h:125
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition:MachineInstrBuilder.h:118
llvm::MachineInstr::NoConvergent
@ NoConvergent
Definition:MachineInstr.h:119
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition:MachineInstr.h:587
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition:MachineMemOperand.h:129
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition:MachineMemOperand.h:144
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition:MachineMemOperand.h:136
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition:MachineMemOperand.h:146
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition:MachineMemOperand.h:138
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition:MachineOperand.cpp:61
llvm::MachineOperand::CreateGA
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
Definition:MachineOperand.h:897
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition:MachineOperand.h:838
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition:MachineRegisterInfo.h:51
llvm::Register
Wrapper class representing virtual and physical registers.
Definition:Register.h:19
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition:SIMachineFunctionInfo.h:390
llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition:SIMachineFunctionInfo.h:969
llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition:SIMachineFunctionInfo.h:940
llvm::SIMachineFunctionInfo::returnsVoid
bool returnsVoid() const
Definition:SIMachineFunctionInfo.h:1051
llvm::SIMachineFunctionInfo::getBytesInStackArgArea
unsigned getBytesInStackArgArea() const
Definition:SIMachineFunctionInfo.h:777
llvm::SIMachineFunctionInfo::setIfReturnsVoid
void setIfReturnsVoid(bool Value)
Definition:SIMachineFunctionInfo.h:1055
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition:SIMachineFunctionInfo.h:907
llvm::SIMachineFunctionInfo::getArgInfo
AMDGPUFunctionArgInfo & getArgInfo()
Definition:SIMachineFunctionInfo.h:894
llvm::SIRegisterInfo
Definition:SIRegisterInfo.h:32
llvm::SITargetLowering
Definition:SIISelLowering.h:31
llvm::SITargetLowering::allocateSpecialInputSGPRs
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Definition:SIISelLowering.cpp:2413
llvm::SITargetLowering::allocateSpecialInputVGPRsFixed
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
Definition:SIISelLowering.cpp:2400
llvm::SITargetLowering::allocateSpecialEntryInputVGPRs
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Definition:SIISelLowering.cpp:2259
llvm::SITargetLowering::allocateSystemSGPRs
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
Definition:SIISelLowering.cpp:2605
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition:StringRef.h:853
llvm::TargetMachine::Options
TargetOptions Options
Definition:TargetMachine.h:118
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition:TargetOptions.h:234
llvm::TargetRegisterClass
Definition:TargetRegisterInfo.h:44
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
uint32_t
uint64_t
unsigned
Analysis.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition:AMDGPUAddrSpace.h:34
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition:AMDGPUAddrSpace.h:36
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2066
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.h:1301
llvm::AMDGPU::isChainCC
bool isChainCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2092
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition:AMDGPUBaseInfo.cpp:2041
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition:AMDGPUBaseInfo.cpp:2058
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition:CallingConv.h:200
llvm::CallingConv::AMDGPU_Gfx
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition:CallingConv.h:232
llvm::CallingConv::AMDGPU_CS_Chain
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition:CallingConv.h:245
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition:CallingConv.h:194
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition:CallingConv.h:41
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition:ISDOpcodes.h:40
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition:ISDOpcodes.h:814
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition:ISDOpcodes.h:805
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition:ISDOpcodes.h:811
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition:MachineInstrBuilder.h:48
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition:X86DisassemblerDecoder.h:621
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::Offset
@ Offset
Definition:DWP.cpp:480
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1739
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition:Utils.cpp:56
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition:STLExtras.h:1697
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition:Alignment.h:145
llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition:bit.h:215
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition:Debug.cpp:163
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1753
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition:Error.cpp:167
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition:CallingConvLower.h:156
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition:Alignment.h:155
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition:Analysis.cpp:79
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition:Alignment.h:212
llvm::getLLTForType
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Definition:LowLevelTypeUtils.cpp:20
llvm::inferAlignFromPtrInfo
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
Definition:Utils.cpp:893
llvm::AMDGPUFunctionArgInfo
Definition:AMDGPUArgumentUsageInfo.h:103
llvm::AMDGPUFunctionArgInfo::PreloadedValue
PreloadedValue
Definition:AMDGPUArgumentUsageInfo.h:105
llvm::AMDGPUFunctionArgInfo::LDS_KERNEL_ID
@ LDS_KERNEL_ID
Definition:AMDGPUArgumentUsageInfo.h:113
llvm::AMDGPUFunctionArgInfo::QUEUE_PTR
@ QUEUE_PTR
Definition:AMDGPUArgumentUsageInfo.h:109
llvm::AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR
@ IMPLICIT_ARG_PTR
Definition:AMDGPUArgumentUsageInfo.h:119
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_Y
@ WORKITEM_ID_Y
Definition:AMDGPUArgumentUsageInfo.h:124
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_Y
@ WORKGROUP_ID_Y
Definition:AMDGPUArgumentUsageInfo.h:115
llvm::AMDGPUFunctionArgInfo::DISPATCH_ID
@ DISPATCH_ID
Definition:AMDGPUArgumentUsageInfo.h:111
llvm::AMDGPUFunctionArgInfo::DISPATCH_PTR
@ DISPATCH_PTR
Definition:AMDGPUArgumentUsageInfo.h:108
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_X
@ WORKGROUP_ID_X
Definition:AMDGPUArgumentUsageInfo.h:114
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_X
@ WORKITEM_ID_X
Definition:AMDGPUArgumentUsageInfo.h:123
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_Z
@ WORKITEM_ID_Z
Definition:AMDGPUArgumentUsageInfo.h:125
llvm::AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR
@ KERNARG_SEGMENT_PTR
Definition:AMDGPUArgumentUsageInfo.h:110
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
@ WORKGROUP_ID_Z
Definition:AMDGPUArgumentUsageInfo.h:116
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition:AMDGPUArgumentUsageInfo.h:161
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition:AMDGPUArgumentUsageInfo.h:160
llvm::AMDGPUFunctionArgInfo::getPreloadedValue
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
Definition:AMDGPUArgumentUsageInfo.cpp:90
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition:AMDGPUArgumentUsageInfo.h:159
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::ArgDescriptor
Definition:AMDGPUArgumentUsageInfo.h:25
llvm::ArgDescriptor::getRegister
MCRegister getRegister() const
Definition:AMDGPUArgumentUsageInfo.h:70
llvm::ArgDescriptor::isRegister
bool isRegister() const
Definition:AMDGPUArgumentUsageInfo.h:66
llvm::ArgDescriptor::createArg
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition:AMDGPUArgumentUsageInfo.h:54
llvm::ArgDescriptor::isMasked
bool isMasked() const
Definition:AMDGPUArgumentUsageInfo.h:86
llvm::ArgInfo
Helper struct shared between Function Specialization and SCCP Solver.
Definition:SCCPSolver.h:41
llvm::CallLowering::ArgInfo
Definition:CallLowering.h:62
llvm::CallLowering::ArgInfo::OrigValue
const Value * OrigValue
Optionally track the original IR value for the argument.
Definition:CallLowering.h:73
llvm::CallLowering::ArgInfo::Regs
SmallVector< Register, 4 > Regs
Definition:CallLowering.h:63
llvm::CallLowering::BaseArgInfo::Flags
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Definition:CallLowering.h:51
llvm::CallLowering::BaseArgInfo::Ty
Type * Ty
Definition:CallLowering.h:50
llvm::CallLowering::CallLoweringInfo
Definition:CallLowering.h:107
llvm::CallLowering::IncomingValueAssigner
Definition:CallLowering.h:230
llvm::CallLowering::IncomingValueHandler
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
Definition:CallLowering.h:331
llvm::CallLowering::IncomingValueHandler::assignValueToReg
void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA) override
Provides a default implementation for argument handling.
Definition:CallLowering.cpp:1383
llvm::CallLowering::IncomingValueHandler::buildExtensionHint
Register buildExtensionHint(const CCValAssign &VA, Register SrcReg, LLT NarrowTy)
Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...
Definition:CallLowering.cpp:1342
llvm::CallLowering::OutgoingValueAssigner
Definition:CallLowering.h:236
llvm::CallLowering::OutgoingValueHandler
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
Definition:CallLowering.h:347
llvm::CallLowering::ValueAssigner::StackSize
uint64_t StackSize
The size of the currently allocated portion of the stack.
Definition:CallLowering.h:217
llvm::CallLowering::ValueHandler
Definition:CallLowering.h:242
llvm::CallLowering::ValueHandler::MIRBuilder
MachineIRBuilder & MIRBuilder
Definition:CallLowering.h:243
llvm::CallLowering::ValueHandler::getStackAddress
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0
Materialize a VReg containing the address of the specified stack-based object.
llvm::CallLowering::ValueHandler::assignValueToAddress
virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA)=0
The specified value has been assigned to a stack location.
llvm::CallLowering::ValueHandler::extendRegister
Register extendRegister(Register ValReg, const CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
Definition:CallLowering.cpp:1291
llvm::CallLowering::ValueHandler::assignValueToReg
virtual void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA)=0
The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...
llvm::EVT
Extended Value Type.
Definition:ValueTypes.h:35
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition:ValueTypes.cpp:210
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition:ValueTypes.h:157
llvm::ISD::ArgFlagsTy
Definition:TargetCallingConv.h:27
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition:MachineMemOperand.h:41
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition:MachineOperand.cpp:1085
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition:MachineOperand.cpp:1072
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition:Alignment.h:117

Generated on Sun Jul 20 2025 11:09:00 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp