Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "Utils/AMDKernelCodeTUtils.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/MC/MCInstrInfo.h"
25#include "llvm/MC/MCRegisterInfo.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/TargetParser/TargetParser.h"
29#include <optional>
30
31#define GET_INSTRINFO_NAMED_OPS
32#define GET_INSTRMAP_INFO
33#include "AMDGPUGenInstrInfo.inc"
34
35staticllvm::cl::opt<unsigned>DefaultAMDHSACodeObjectVersion(
36"amdhsa-code-object-version",llvm::cl::Hidden,
37llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
38llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
39"or asm directive still take priority if present)"));
40
41namespace{
42
43/// \returns Bit mask for given bit \p Shift and bit \p Width.
44unsigned getBitMask(unsigned Shift,unsigned Width) {
45return ((1 << Width) - 1) << Shift;
46}
47
48/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49///
50/// \returns Packed \p Dst.
51unsigned packBits(unsigned Src,unsigned Dst,unsigned Shift,unsigned Width) {
52unsignedMask = getBitMask(Shift, Width);
53return ((Src << Shift) & Mask) | (Dst & ~Mask);
54}
55
56/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57///
58/// \returns Unpacked bits.
59unsigned unpackBits(unsigned Src,unsigned Shift,unsigned Width) {
60return (Src & getBitMask(Shift, Width)) >> Shift;
61}
62
63/// \returns Vmcnt bit shift (lower bits).
64unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65returnVersionMajor >= 11 ? 10 : 0;
66}
67
68/// \returns Vmcnt bit width (lower bits).
69unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70returnVersionMajor >= 11 ? 6 : 4;
71}
72
73/// \returns Expcnt bit shift.
74unsigned getExpcntBitShift(unsigned VersionMajor) {
75returnVersionMajor >= 11 ? 0 : 4;
76}
77
78/// \returns Expcnt bit width.
79unsigned getExpcntBitWidth(unsigned VersionMajor) {return 3; }
80
81/// \returns Lgkmcnt bit shift.
82unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83returnVersionMajor >= 11 ? 4 : 8;
84}
85
86/// \returns Lgkmcnt bit width.
87unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88returnVersionMajor >= 10 ? 6 : 4;
89}
90
91/// \returns Vmcnt bit shift (higher bits).
92unsigned getVmcntBitShiftHi(unsigned VersionMajor) {return 14; }
93
94/// \returns Vmcnt bit width (higher bits).
95unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97}
98
99/// \returns Loadcnt bit width
100unsigned getLoadcntBitWidth(unsigned VersionMajor) {
101returnVersionMajor >= 12 ? 6 : 0;
102}
103
104/// \returns Samplecnt bit width.
105unsigned getSamplecntBitWidth(unsigned VersionMajor) {
106returnVersionMajor >= 12 ? 6 : 0;
107}
108
109/// \returns Bvhcnt bit width.
110unsigned getBvhcntBitWidth(unsigned VersionMajor) {
111returnVersionMajor >= 12 ? 3 : 0;
112}
113
114/// \returns Dscnt bit width.
115unsigned getDscntBitWidth(unsigned VersionMajor) {
116returnVersionMajor >= 12 ? 6 : 0;
117}
118
119/// \returns Dscnt bit shift in combined S_WAIT instructions.
120unsigned getDscntBitShift(unsigned VersionMajor) {return 0; }
121
122/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
123unsigned getStorecntBitWidth(unsigned VersionMajor) {
124returnVersionMajor >= 10 ? 6 : 0;
125}
126
127/// \returns Kmcnt bit width.
128unsigned getKmcntBitWidth(unsigned VersionMajor) {
129returnVersionMajor >= 12 ? 5 : 0;
130}
131
132/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
133unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
134returnVersionMajor >= 12 ? 8 : 0;
135}
136
137/// \returns VmVsrc bit width
138inlineunsigned getVmVsrcBitWidth() {return 3; }
139
140/// \returns VmVsrc bit shift
141inlineunsigned getVmVsrcBitShift() {return 2; }
142
143/// \returns VaVdst bit width
144inlineunsigned getVaVdstBitWidth() {return 4; }
145
146/// \returns VaVdst bit shift
147inlineunsigned getVaVdstBitShift() {return 12; }
148
149/// \returns SaSdst bit width
150inlineunsigned getSaSdstBitWidth() {return 1; }
151
152/// \returns SaSdst bit shift
153inlineunsigned getSaSdstBitShift() {return 0; }
154
155}// end anonymous namespace
156
157namespacellvm {
158
159namespaceAMDGPU {
160
161/// \returns true if the target supports signed immediate offset for SMRD
162/// instructions.
163boolhasSMRDSignedImmOffset(constMCSubtargetInfo &ST) {
164returnisGFX9Plus(ST);
165}
166
167/// \returns True if \p STI is AMDHSA.
168boolisHsaAbi(constMCSubtargetInfo &STI) {
169return STI.getTargetTriple().getOS() ==Triple::AMDHSA;
170}
171
172unsignedgetAMDHSACodeObjectVersion(constModule &M) {
173if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
174 M.getModuleFlag("amdhsa_code_object_version"))) {
175return (unsigned)Ver->getZExtValue() / 100;
176 }
177
178returngetDefaultAMDHSACodeObjectVersion();
179}
180
181unsignedgetDefaultAMDHSACodeObjectVersion() {
182returnDefaultAMDHSACodeObjectVersion;
183}
184
185unsignedgetAMDHSACodeObjectVersion(unsigned ABIVersion) {
186switch (ABIVersion) {
187caseELF::ELFABIVERSION_AMDGPU_HSA_V4:
188return 4;
189caseELF::ELFABIVERSION_AMDGPU_HSA_V5:
190return 5;
191caseELF::ELFABIVERSION_AMDGPU_HSA_V6:
192return 6;
193default:
194returngetDefaultAMDHSACodeObjectVersion();
195 }
196}
197
198uint8_tgetELFABIVersion(constTriple &T,unsigned CodeObjectVersion) {
199if (T.getOS() !=Triple::AMDHSA)
200return 0;
201
202switch (CodeObjectVersion) {
203case 4:
204returnELF::ELFABIVERSION_AMDGPU_HSA_V4;
205case 5:
206returnELF::ELFABIVERSION_AMDGPU_HSA_V5;
207case 6:
208returnELF::ELFABIVERSION_AMDGPU_HSA_V6;
209default:
210report_fatal_error("Unsupported AMDHSA Code Object Version " +
211Twine(CodeObjectVersion));
212 }
213}
214
215unsignedgetMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
216switch (CodeObjectVersion) {
217caseAMDHSA_COV4:
218return 48;
219caseAMDHSA_COV5:
220caseAMDHSA_COV6:
221default:
222returnAMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
223 }
224}
225
226
227// FIXME: All such magic numbers about the ABI should be in a
228// central TD file.
229unsignedgetHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
230switch (CodeObjectVersion) {
231caseAMDHSA_COV4:
232return 24;
233caseAMDHSA_COV5:
234caseAMDHSA_COV6:
235default:
236returnAMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
237 }
238}
239
240unsignedgetDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
241switch (CodeObjectVersion) {
242caseAMDHSA_COV4:
243return 32;
244caseAMDHSA_COV5:
245caseAMDHSA_COV6:
246default:
247returnAMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
248 }
249}
250
251unsignedgetCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
252switch (CodeObjectVersion) {
253caseAMDHSA_COV4:
254return 40;
255caseAMDHSA_COV5:
256caseAMDHSA_COV6:
257default:
258returnAMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
259 }
260}
261
262#define GET_MIMGBaseOpcodesTable_IMPL
263#define GET_MIMGDimInfoTable_IMPL
264#define GET_MIMGInfoTable_IMPL
265#define GET_MIMGLZMappingTable_IMPL
266#define GET_MIMGMIPMappingTable_IMPL
267#define GET_MIMGBiasMappingTable_IMPL
268#define GET_MIMGOffsetMappingTable_IMPL
269#define GET_MIMGG16MappingTable_IMPL
270#define GET_MAIInstInfoTable_IMPL
271#include "AMDGPUGenSearchableTables.inc"
272
273intgetMIMGOpcode(unsigned BaseOpcode,unsigned MIMGEncoding,
274unsigned VDataDwords,unsigned VAddrDwords) {
275constMIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
276 VDataDwords, VAddrDwords);
277returnInfo ?Info->Opcode : -1;
278}
279
280constMIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
281constMIMGInfo *Info =getMIMGInfo(Opc);
282returnInfo ?getMIMGBaseOpcodeInfo(Info->BaseOpcode) :nullptr;
283}
284
285intgetMaskedMIMGOp(unsigned Opc,unsigned NewChannels) {
286constMIMGInfo *OrigInfo =getMIMGInfo(Opc);
287constMIMGInfo *NewInfo =
288 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
289 NewChannels, OrigInfo->VAddrDwords);
290return NewInfo ? NewInfo->Opcode : -1;
291}
292
293unsignedgetAddrSizeMIMGOp(constMIMGBaseOpcodeInfo *BaseOpcode,
294constMIMGDimInfo *Dim,bool IsA16,
295bool IsG16Supported) {
296unsigned AddrWords = BaseOpcode->NumExtraArgs;
297unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
298 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
299if (IsA16)
300 AddrWords +=divideCeil(AddrComponents, 2);
301else
302 AddrWords += AddrComponents;
303
304// Note: For subtargets that support A16 but not G16, enabling A16 also
305// enables 16 bit gradients.
306// For subtargets that support A16 (operand) and G16 (done with a different
307// instruction encoding), they are independent.
308
309if (BaseOpcode->Gradients) {
310if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
311// There are two gradients per coordinate, we pack them separately.
312// For the 3d case,
313// we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
314 AddrWords += alignTo<2>(Dim->NumGradients / 2);
315else
316 AddrWords += Dim->NumGradients;
317 }
318return AddrWords;
319}
320
321structMUBUFInfo {
322uint16_tOpcode;
323uint16_tBaseOpcode;
324uint8_telements;
325boolhas_vaddr;
326boolhas_srsrc;
327boolhas_soffset;
328boolIsBufferInv;
329booltfe;
330};
331
332structMTBUFInfo {
333uint16_tOpcode;
334uint16_tBaseOpcode;
335uint8_telements;
336boolhas_vaddr;
337boolhas_srsrc;
338boolhas_soffset;
339};
340
341structSMInfo {
342uint16_tOpcode;
343boolIsBuffer;
344};
345
346structVOPInfo {
347uint16_tOpcode;
348boolIsSingle;
349};
350
351structVOPC64DPPInfo {
352uint16_tOpcode;
353};
354
355structVOPCDPPAsmOnlyInfo {
356uint16_tOpcode;
357};
358
359structVOP3CDPPAsmOnlyInfo {
360uint16_tOpcode;
361};
362
363structVOPDComponentInfo {
364uint16_tBaseVOP;
365uint16_tVOPDOp;
366boolCanBeVOPDX;
367};
368
369structVOPDInfo {
370uint16_tOpcode;
371uint16_tOpX;
372uint16_tOpY;
373uint16_tSubtarget;
374};
375
376structVOPTrue16Info {
377uint16_tOpcode;
378boolIsTrue16;
379};
380
381#define GET_FP4FP8DstByteSelTable_DECL
382#define GET_FP4FP8DstByteSelTable_IMPL
383
384structDPMACCInstructionInfo {
385uint16_tOpcode;
386boolIsDPMACCInstruction;
387};
388
389structFP4FP8DstByteSelInfo {
390uint16_tOpcode;
391boolHasFP8DstByteSel;
392boolHasFP4DstByteSel;
393};
394
395#define GET_MTBUFInfoTable_DECL
396#define GET_MTBUFInfoTable_IMPL
397#define GET_MUBUFInfoTable_DECL
398#define GET_MUBUFInfoTable_IMPL
399#define GET_SMInfoTable_DECL
400#define GET_SMInfoTable_IMPL
401#define GET_VOP1InfoTable_DECL
402#define GET_VOP1InfoTable_IMPL
403#define GET_VOP2InfoTable_DECL
404#define GET_VOP2InfoTable_IMPL
405#define GET_VOP3InfoTable_DECL
406#define GET_VOP3InfoTable_IMPL
407#define GET_VOPC64DPPTable_DECL
408#define GET_VOPC64DPPTable_IMPL
409#define GET_VOPC64DPP8Table_DECL
410#define GET_VOPC64DPP8Table_IMPL
411#define GET_VOPCAsmOnlyInfoTable_DECL
412#define GET_VOPCAsmOnlyInfoTable_IMPL
413#define GET_VOP3CAsmOnlyInfoTable_DECL
414#define GET_VOP3CAsmOnlyInfoTable_IMPL
415#define GET_VOPDComponentTable_DECL
416#define GET_VOPDComponentTable_IMPL
417#define GET_VOPDPairs_DECL
418#define GET_VOPDPairs_IMPL
419#define GET_VOPTrue16Table_DECL
420#define GET_VOPTrue16Table_IMPL
421#define GET_WMMAOpcode2AddrMappingTable_DECL
422#define GET_WMMAOpcode2AddrMappingTable_IMPL
423#define GET_WMMAOpcode3AddrMappingTable_DECL
424#define GET_WMMAOpcode3AddrMappingTable_IMPL
425#define GET_getMFMA_F8F6F4_WithSize_DECL
426#define GET_getMFMA_F8F6F4_WithSize_IMPL
427#define GET_isMFMA_F8F6F4Table_IMPL
428#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
429
430#include "AMDGPUGenSearchableTables.inc"
431
432intgetMTBUFBaseOpcode(unsigned Opc) {
433constMTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
434returnInfo ?Info->BaseOpcode : -1;
435}
436
437intgetMTBUFOpcode(unsigned BaseOpc,unsigned Elements) {
438constMTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
439returnInfo ?Info->Opcode : -1;
440}
441
442intgetMTBUFElements(unsigned Opc) {
443constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
444returnInfo ?Info->elements : 0;
445}
446
447boolgetMTBUFHasVAddr(unsigned Opc) {
448constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
449returnInfo ?Info->has_vaddr :false;
450}
451
452boolgetMTBUFHasSrsrc(unsigned Opc) {
453constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
454returnInfo ?Info->has_srsrc :false;
455}
456
457boolgetMTBUFHasSoffset(unsigned Opc) {
458constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
459returnInfo ?Info->has_soffset :false;
460}
461
462intgetMUBUFBaseOpcode(unsigned Opc) {
463constMUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
464returnInfo ?Info->BaseOpcode : -1;
465}
466
467intgetMUBUFOpcode(unsigned BaseOpc,unsigned Elements) {
468constMUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
469returnInfo ?Info->Opcode : -1;
470}
471
472intgetMUBUFElements(unsigned Opc) {
473constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
474returnInfo ?Info->elements : 0;
475}
476
477boolgetMUBUFHasVAddr(unsigned Opc) {
478constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
479returnInfo ?Info->has_vaddr :false;
480}
481
482boolgetMUBUFHasSrsrc(unsigned Opc) {
483constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
484returnInfo ?Info->has_srsrc :false;
485}
486
487boolgetMUBUFHasSoffset(unsigned Opc) {
488constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
489returnInfo ?Info->has_soffset :false;
490}
491
492boolgetMUBUFIsBufferInv(unsigned Opc) {
493constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
494returnInfo ?Info->IsBufferInv :false;
495}
496
497boolgetMUBUFTfe(unsigned Opc) {
498constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
499returnInfo ?Info->tfe :false;
500}
501
502boolgetSMEMIsBuffer(unsigned Opc) {
503constSMInfo *Info = getSMEMOpcodeHelper(Opc);
504returnInfo ?Info->IsBuffer :false;
505}
506
507boolgetVOP1IsSingle(unsigned Opc) {
508constVOPInfo *Info = getVOP1OpcodeHelper(Opc);
509returnInfo ?Info->IsSingle :true;
510}
511
512boolgetVOP2IsSingle(unsigned Opc) {
513constVOPInfo *Info = getVOP2OpcodeHelper(Opc);
514returnInfo ?Info->IsSingle :true;
515}
516
517boolgetVOP3IsSingle(unsigned Opc) {
518constVOPInfo *Info = getVOP3OpcodeHelper(Opc);
519returnInfo ?Info->IsSingle :true;
520}
521
522boolisVOPC64DPP(unsigned Opc) {
523return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
524}
525
526boolisVOPCAsmOnly(unsigned Opc) {return isVOPCAsmOnlyOpcodeHelper(Opc); }
527
528boolgetMAIIsDGEMM(unsigned Opc) {
529constMAIInstInfo *Info = getMAIInstInfoHelper(Opc);
530returnInfo ?Info->is_dgemm :false;
531}
532
533boolgetMAIIsGFX940XDL(unsigned Opc) {
534constMAIInstInfo *Info = getMAIInstInfoHelper(Opc);
535returnInfo ?Info->is_gfx940_xdl :false;
536}
537
538uint8_tmfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
539switch (EncodingVal) {
540caseMFMAScaleFormats::FP6_E2M3:
541caseMFMAScaleFormats::FP6_E3M2:
542return 6;
543caseMFMAScaleFormats::FP4_E2M1:
544return 4;
545caseMFMAScaleFormats::FP8_E4M3:
546caseMFMAScaleFormats::FP8_E5M2:
547default:
548return 8;
549 }
550
551llvm_unreachable("covered switch over mfma scale formats");
552}
553
554constMFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
555unsigned BLGP,
556unsigned F8F8Opcode) {
557uint8_t SrcANumRegs =mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
558uint8_t SrcBNumRegs =mfmaScaleF8F6F4FormatToNumRegs(BLGP);
559return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
560}
561
562unsignedgetVOPDEncodingFamily(constMCSubtargetInfo &ST) {
563if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
564returnSIEncodingFamily::GFX12;
565if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
566returnSIEncodingFamily::GFX11;
567llvm_unreachable("Subtarget generation does not support VOPD!");
568}
569
570CanBeVOPDgetCanBeVOPD(unsigned Opc) {
571constVOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
572if (Info)
573return {Info->CanBeVOPDX,true};
574return {false,false};
575}
576
577unsignedgetVOPDOpcode(unsigned Opc) {
578constVOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
579returnInfo ?Info->VOPDOp : ~0u;
580}
581
582boolisVOPD(unsigned Opc) {
583returnAMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
584}
585
586boolisMAC(unsigned Opc) {
587return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
588 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
589 Opc == AMDGPU::V_MAC_F32_e64_vi ||
590 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
591 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
592 Opc == AMDGPU::V_MAC_F16_e64_vi ||
593 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
594 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
595 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
596 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
597 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
598 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
599 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
600 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
601 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
602 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
603 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
604 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
605 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
606 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
607 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
608}
609
610boolisPermlane16(unsigned Opc) {
611return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
612 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
613 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
614 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
615 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
616 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
617 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
618 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
619}
620
621boolisCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
622return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
623 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
624 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
625 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
626 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
627 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
628 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
629 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
630 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
631 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
632}
633
634boolisGenericAtomic(unsigned Opc) {
635return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
636 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
637 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
638 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
639 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
640 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
641 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
642 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
643 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
644 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
645 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
646 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
647 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
648 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
649 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
650 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
651 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
652}
653
654boolisTrue16Inst(unsigned Opc) {
655constVOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
656returnInfo ?Info->IsTrue16 :false;
657}
658
659FPTypegetFPDstSelType(unsigned Opc) {
660constFP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
661if (!Info)
662returnFPType::None;
663if (Info->HasFP8DstByteSel)
664returnFPType::FP8;
665if (Info->HasFP4DstByteSel)
666returnFPType::FP4;
667
668returnFPType::None;
669}
670
671unsignedmapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
672constWMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
673returnInfo ?Info->Opcode3Addr : ~0u;
674}
675
676unsignedmapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
677constWMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
678returnInfo ?Info->Opcode2Addr : ~0u;
679}
680
681// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
682// header files, so we need to wrap it in a function that takes unsigned
683// instead.
684intgetMCOpcode(uint16_t Opcode,unsigned Gen) {
685return getMCOpcodeGen(Opcode,static_cast<Subtarget>(Gen));
686}
687
688intgetVOPDFull(unsigned OpX,unsigned OpY,unsigned EncodingFamily) {
689constVOPDInfo *Info =
690 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
691returnInfo ?Info->Opcode : -1;
692}
693
694std::pair<unsigned, unsigned>getVOPDComponents(unsigned VOPDOpcode) {
695constVOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
696assert(Info);
697constauto *OpX = getVOPDBaseFromComponent(Info->OpX);
698constauto *OpY = getVOPDBaseFromComponent(Info->OpY);
699assert(OpX && OpY);
700return {OpX->BaseVOP, OpY->BaseVOP};
701}
702
703namespaceVOPD {
704
705ComponentProps::ComponentProps(constMCInstrDesc &OpDesc) {
706assert(OpDesc.getNumDefs() ==Component::DST_NUM);
707
708assert(OpDesc.getOperandConstraint(Component::SRC0,MCOI::TIED_TO) == -1);
709assert(OpDesc.getOperandConstraint(Component::SRC1,MCOI::TIED_TO) == -1);
710auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2,MCOI::TIED_TO);
711assert(TiedIdx == -1 || TiedIdx ==Component::DST);
712 HasSrc2Acc = TiedIdx != -1;
713
714 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
715assert(SrcOperandsNum <=Component::MAX_SRC_NUM);
716
717auto OperandsNum = OpDesc.getNumOperands();
718unsigned CompOprIdx;
719for (CompOprIdx =Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
720if (OpDesc.operands()[CompOprIdx].OperandType ==AMDGPU::OPERAND_KIMM32) {
721 MandatoryLiteralIdx = CompOprIdx;
722break;
723 }
724 }
725}
726
727unsignedComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const{
728assert(CompOprIdx <Component::MAX_OPR_NUM);
729
730if (CompOprIdx ==Component::DST)
731returngetIndexOfDstInParsedOperands();
732
733auto CompSrcIdx = CompOprIdx -Component::DST_NUM;
734if (CompSrcIdx <getCompParsedSrcOperandsNum())
735returngetIndexOfSrcInParsedOperands(CompSrcIdx);
736
737// The specified operand does not exist.
738return 0;
739}
740
741std::optional<unsigned>InstInfo::getInvalidCompOperandIndex(
742 std::function<unsigned(unsigned,unsigned)> GetRegIdx,bool SkipSrc) const{
743
744auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
745auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
746
747constunsigned CompOprNum =
748 SkipSrc ?Component::DST_NUM :Component::MAX_OPR_NUM;
749unsigned CompOprIdx;
750for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
751unsigned BanksMasks =VOPD_VGPR_BANK_MASKS[CompOprIdx];
752if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
753 ((OpXRegs[CompOprIdx] & BanksMasks) ==
754 (OpYRegs[CompOprIdx] & BanksMasks)))
755return CompOprIdx;
756 }
757
758return {};
759}
760
761// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
762// by the specified component. If an operand is unused
763// or is not a VGPR, the corresponding value is 0.
764//
765// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
766// for the specified component and MC operand. The callback must return 0
767// if the operand is not a register or not a VGPR.
768InstInfo::RegIndices InstInfo::getRegIndices(
769unsigned CompIdx,
770 std::function<unsigned(unsigned,unsigned)> GetRegIdx) const{
771assert(CompIdx <COMPONENTS_NUM);
772
773constauto &Comp = CompInfo[CompIdx];
774InstInfo::RegIndicesRegIndices;
775
776RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
777
778for (unsigned CompOprIdx : {SRC0,SRC1,SRC2}) {
779unsigned CompSrcIdx = CompOprIdx -DST_NUM;
780RegIndices[CompOprIdx] =
781 Comp.hasRegSrcOperand(CompSrcIdx)
782 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
783 : 0;
784 }
785returnRegIndices;
786}
787
788}// namespace VOPD
789
790VOPD::InstInfogetVOPDInstInfo(constMCInstrDesc &OpX,constMCInstrDesc &OpY) {
791returnVOPD::InstInfo(OpX, OpY);
792}
793
794VOPD::InstInfogetVOPDInstInfo(unsigned VOPDOpcode,
795constMCInstrInfo *InstrInfo) {
796auto [OpX, OpY] =getVOPDComponents(VOPDOpcode);
797constauto &OpXDesc = InstrInfo->get(OpX);
798constauto &OpYDesc = InstrInfo->get(OpY);
799VOPD::ComponentInfo OpXInfo(OpXDesc,VOPD::ComponentKind::COMPONENT_X);
800VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
801returnVOPD::InstInfo(OpXInfo, OpYInfo);
802}
803
804namespaceIsaInfo {
805
806AMDGPUTargetID::AMDGPUTargetID(constMCSubtargetInfo &STI)
807 : STI(STI), XnackSetting(TargetIDSetting::Any),
808 SramEccSetting(TargetIDSetting::Any) {
809if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
810 XnackSetting =TargetIDSetting::Unsupported;
811if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
812 SramEccSetting =TargetIDSetting::Unsupported;
813}
814
815voidAMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
816// Check if xnack or sramecc is explicitly enabled or disabled. In the
817// absence of the target features we assume we must generate code that can run
818// in any environment.
819SubtargetFeatures Features(FS);
820 std::optional<bool> XnackRequested;
821 std::optional<bool> SramEccRequested;
822
823for (const std::string &Feature : Features.getFeatures()) {
824if (Feature =="+xnack")
825 XnackRequested =true;
826elseif (Feature =="-xnack")
827 XnackRequested =false;
828elseif (Feature =="+sramecc")
829 SramEccRequested =true;
830elseif (Feature =="-sramecc")
831 SramEccRequested =false;
832 }
833
834bool XnackSupported =isXnackSupported();
835bool SramEccSupported =isSramEccSupported();
836
837if (XnackRequested) {
838if (XnackSupported) {
839 XnackSetting =
840 *XnackRequested ?TargetIDSetting::On :TargetIDSetting::Off;
841 }else {
842// If a specific xnack setting was requested and this GPU does not support
843// xnack emit a warning. Setting will remain set to "Unsupported".
844if (*XnackRequested) {
845errs() <<"warning: xnack 'On' was requested for a processor that does "
846"not support it!\n";
847 }else {
848errs() <<"warning: xnack 'Off' was requested for a processor that "
849"does not support it!\n";
850 }
851 }
852 }
853
854if (SramEccRequested) {
855if (SramEccSupported) {
856 SramEccSetting =
857 *SramEccRequested ?TargetIDSetting::On :TargetIDSetting::Off;
858 }else {
859// If a specific sramecc setting was requested and this GPU does not
860// support sramecc emit a warning. Setting will remain set to
861// "Unsupported".
862if (*SramEccRequested) {
863errs() <<"warning: sramecc 'On' was requested for a processor that "
864"does not support it!\n";
865 }else {
866errs() <<"warning: sramecc 'Off' was requested for a processor that "
867"does not support it!\n";
868 }
869 }
870 }
871}
872
873staticTargetIDSetting
874getTargetIDSettingFromFeatureString(StringRef FeatureString) {
875if (FeatureString.ends_with("-"))
876returnTargetIDSetting::Off;
877if (FeatureString.ends_with("+"))
878returnTargetIDSetting::On;
879
880llvm_unreachable("Malformed feature string");
881}
882
883voidAMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
884SmallVector<StringRef, 3> TargetIDSplit;
885 TargetID.split(TargetIDSplit,':');
886
887for (constauto &FeatureString : TargetIDSplit) {
888if (FeatureString.starts_with("xnack"))
889 XnackSetting =getTargetIDSettingFromFeatureString(FeatureString);
890if (FeatureString.starts_with("sramecc"))
891 SramEccSetting =getTargetIDSettingFromFeatureString(FeatureString);
892 }
893}
894
895std::stringAMDGPUTargetID::toString() const{
896 std::string StringRep;
897raw_string_ostream StreamRep(StringRep);
898
899auto TargetTriple = STI.getTargetTriple();
900autoVersion =getIsaVersion(STI.getCPU());
901
902 StreamRep << TargetTriple.getArchName() <<'-'
903 << TargetTriple.getVendorName() <<'-'
904 << TargetTriple.getOSName() <<'-'
905 << TargetTriple.getEnvironmentName() <<'-';
906
907 std::string Processor;
908// TODO: Following else statement is present here because we used various
909// alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
910// Remove once all aliases are removed from GCNProcessors.td.
911if (Version.Major >= 9)
912 Processor = STI.getCPU().str();
913else
914 Processor = (Twine("gfx") +Twine(Version.Major) +Twine(Version.Minor) +
915Twine(Version.Stepping))
916 .str();
917
918 std::string Features;
919if (STI.getTargetTriple().getOS() ==Triple::AMDHSA) {
920// sramecc.
921if (getSramEccSetting() ==TargetIDSetting::Off)
922 Features +=":sramecc-";
923elseif (getSramEccSetting() ==TargetIDSetting::On)
924 Features +=":sramecc+";
925// xnack.
926if (getXnackSetting() ==TargetIDSetting::Off)
927 Features +=":xnack-";
928elseif (getXnackSetting() ==TargetIDSetting::On)
929 Features +=":xnack+";
930 }
931
932 StreamRep << Processor << Features;
933
934return StringRep;
935}
936
937unsignedgetWavefrontSize(constMCSubtargetInfo *STI) {
938if (STI->getFeatureBits().test(FeatureWavefrontSize16))
939return 16;
940if (STI->getFeatureBits().test(FeatureWavefrontSize32))
941return 32;
942
943return 64;
944}
945
946unsignedgetLocalMemorySize(constMCSubtargetInfo *STI) {
947unsigned BytesPerCU =getAddressableLocalMemorySize(STI);
948
949// "Per CU" really means "per whatever functional block the waves of a
950// workgroup must share". So the effective local memory size is doubled in
951// WGP mode on gfx10.
952if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
953 BytesPerCU *= 2;
954
955return BytesPerCU;
956}
957
958unsignedgetAddressableLocalMemorySize(constMCSubtargetInfo *STI) {
959if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
960return 32768;
961if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
962return 65536;
963if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
964return 163840;
965return 0;
966}
967
968unsignedgetEUsPerCU(constMCSubtargetInfo *STI) {
969// "Per CU" really means "per whatever functional block the waves of a
970// workgroup must share". For gfx10 in CU mode this is the CU, which contains
971// two SIMDs.
972if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
973return 2;
974// Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
975// two CUs, so a total of four SIMDs.
976return 4;
977}
978
979unsignedgetMaxWorkGroupsPerCU(constMCSubtargetInfo *STI,
980unsigned FlatWorkGroupSize) {
981assert(FlatWorkGroupSize != 0);
982if (STI->getTargetTriple().getArch() !=Triple::amdgcn)
983return 8;
984unsigned MaxWaves =getMaxWavesPerEU(STI) *getEUsPerCU(STI);
985unsignedN =getWavesPerWorkGroup(STI, FlatWorkGroupSize);
986if (N == 1) {
987// Single-wave workgroups don't consume barrier resources.
988return MaxWaves;
989 }
990
991unsigned MaxBarriers = 16;
992if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
993 MaxBarriers = 32;
994
995return std::min(MaxWaves /N, MaxBarriers);
996}
997
998unsignedgetMinWavesPerEU(constMCSubtargetInfo *STI) {
999return 1;
1000}
1001
1002unsignedgetMaxWavesPerEU(constMCSubtargetInfo *STI) {
1003// FIXME: Need to take scratch memory into account.
1004if (isGFX90A(*STI))
1005return 8;
1006if (!isGFX10Plus(*STI))
1007return 10;
1008returnhasGFX10_3Insts(*STI) ? 16 : 20;
1009}
1010
1011unsignedgetWavesPerEUForWorkGroup(constMCSubtargetInfo *STI,
1012unsigned FlatWorkGroupSize) {
1013returndivideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1014getEUsPerCU(STI));
1015}
1016
1017unsignedgetMinFlatWorkGroupSize(constMCSubtargetInfo *STI) {
1018return 1;
1019}
1020
1021unsignedgetMaxFlatWorkGroupSize(constMCSubtargetInfo *STI) {
1022// Some subtargets allow encoding 2048, but this isn't tested or supported.
1023return 1024;
1024}
1025
1026unsignedgetWavesPerWorkGroup(constMCSubtargetInfo *STI,
1027unsigned FlatWorkGroupSize) {
1028returndivideCeil(FlatWorkGroupSize,getWavefrontSize(STI));
1029}
1030
1031unsignedgetSGPRAllocGranule(constMCSubtargetInfo *STI) {
1032IsaVersionVersion =getIsaVersion(STI->getCPU());
1033if (Version.Major >= 10)
1034returngetAddressableNumSGPRs(STI);
1035if (Version.Major >= 8)
1036return 16;
1037return 8;
1038}
1039
1040unsignedgetSGPREncodingGranule(constMCSubtargetInfo *STI) {
1041return 8;
1042}
1043
1044unsignedgetTotalNumSGPRs(constMCSubtargetInfo *STI) {
1045IsaVersionVersion =getIsaVersion(STI->getCPU());
1046if (Version.Major >= 8)
1047return 800;
1048return 512;
1049}
1050
1051unsignedgetAddressableNumSGPRs(constMCSubtargetInfo *STI) {
1052if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1053returnFIXED_NUM_SGPRS_FOR_INIT_BUG;
1054
1055IsaVersionVersion =getIsaVersion(STI->getCPU());
1056if (Version.Major >= 10)
1057return 106;
1058if (Version.Major >= 8)
1059return 102;
1060return 104;
1061}
1062
1063unsignedgetMinNumSGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU) {
1064assert(WavesPerEU != 0);
1065
1066IsaVersionVersion =getIsaVersion(STI->getCPU());
1067if (Version.Major >= 10)
1068return 0;
1069
1070if (WavesPerEU >=getMaxWavesPerEU(STI))
1071return 0;
1072
1073unsigned MinNumSGPRs =getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1074if (STI->getFeatureBits().test(FeatureTrapHandler))
1075 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1076 MinNumSGPRs =alignDown(MinNumSGPRs,getSGPRAllocGranule(STI)) + 1;
1077return std::min(MinNumSGPRs,getAddressableNumSGPRs(STI));
1078}
1079
1080unsignedgetMaxNumSGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU,
1081bool Addressable) {
1082assert(WavesPerEU != 0);
1083
1084unsigned AddressableNumSGPRs =getAddressableNumSGPRs(STI);
1085IsaVersionVersion =getIsaVersion(STI->getCPU());
1086if (Version.Major >= 10)
1087return Addressable ? AddressableNumSGPRs : 108;
1088if (Version.Major >= 8 && !Addressable)
1089 AddressableNumSGPRs = 112;
1090unsigned MaxNumSGPRs =getTotalNumSGPRs(STI) / WavesPerEU;
1091if (STI->getFeatureBits().test(FeatureTrapHandler))
1092 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1093 MaxNumSGPRs =alignDown(MaxNumSGPRs,getSGPRAllocGranule(STI));
1094return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1095}
1096
1097unsignedgetNumExtraSGPRs(constMCSubtargetInfo *STI,bool VCCUsed,
1098bool FlatScrUsed,bool XNACKUsed) {
1099unsigned ExtraSGPRs = 0;
1100if (VCCUsed)
1101 ExtraSGPRs = 2;
1102
1103IsaVersionVersion =getIsaVersion(STI->getCPU());
1104if (Version.Major >= 10)
1105return ExtraSGPRs;
1106
1107if (Version.Major < 8) {
1108if (FlatScrUsed)
1109 ExtraSGPRs = 4;
1110 }else {
1111if (XNACKUsed)
1112 ExtraSGPRs = 4;
1113
1114if (FlatScrUsed ||
1115 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1116 ExtraSGPRs = 6;
1117 }
1118
1119return ExtraSGPRs;
1120}
1121
1122unsignedgetNumExtraSGPRs(constMCSubtargetInfo *STI,bool VCCUsed,
1123bool FlatScrUsed) {
1124returngetNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1125 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1126}
1127
1128staticunsignedgetGranulatedNumRegisterBlocks(unsigned NumRegs,
1129unsigned Granule) {
1130returndivideCeil(std::max(1u, NumRegs), Granule);
1131}
1132
1133unsignedgetNumSGPRBlocks(constMCSubtargetInfo *STI,unsigned NumSGPRs) {
1134// SGPRBlocks is actual number of SGPR blocks minus 1.
1135returngetGranulatedNumRegisterBlocks(NumSGPRs,getSGPREncodingGranule(STI)) -
1136 1;
1137}
1138
1139unsignedgetVGPRAllocGranule(constMCSubtargetInfo *STI,
1140 std::optional<bool> EnableWavefrontSize32) {
1141if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1142return 8;
1143
1144bool IsWave32 = EnableWavefrontSize32 ?
1145 *EnableWavefrontSize32 :
1146 STI->getFeatureBits().test(FeatureWavefrontSize32);
1147
1148if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1149return IsWave32 ? 24 : 12;
1150
1151if (hasGFX10_3Insts(*STI))
1152return IsWave32 ? 16 : 8;
1153
1154return IsWave32 ? 8 : 4;
1155}
1156
1157unsignedgetVGPREncodingGranule(constMCSubtargetInfo *STI,
1158 std::optional<bool> EnableWavefrontSize32) {
1159if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1160return 8;
1161
1162bool IsWave32 = EnableWavefrontSize32 ?
1163 *EnableWavefrontSize32 :
1164 STI->getFeatureBits().test(FeatureWavefrontSize32);
1165
1166return IsWave32 ? 8 : 4;
1167}
1168
1169unsignedgetTotalNumVGPRs(constMCSubtargetInfo *STI) {
1170if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1171return 512;
1172if (!isGFX10Plus(*STI))
1173return 256;
1174bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1175if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1176return IsWave32 ? 1536 : 768;
1177return IsWave32 ? 1024 : 512;
1178}
1179
1180unsignedgetAddressableNumArchVGPRs(constMCSubtargetInfo *STI) {return 256; }
1181
1182unsignedgetAddressableNumVGPRs(constMCSubtargetInfo *STI) {
1183if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1184return 512;
1185returngetAddressableNumArchVGPRs(STI);
1186}
1187
1188unsignedgetNumWavesPerEUWithNumVGPRs(constMCSubtargetInfo *STI,
1189unsigned NumVGPRs) {
1190returngetNumWavesPerEUWithNumVGPRs(NumVGPRs,getVGPRAllocGranule(STI),
1191getMaxWavesPerEU(STI),
1192getTotalNumVGPRs(STI));
1193}
1194
1195unsignedgetNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs,unsigned Granule,
1196unsigned MaxWaves,
1197unsigned TotalNumVGPRs) {
1198if (NumVGPRs < Granule)
1199return MaxWaves;
1200unsigned RoundedRegs =alignTo(NumVGPRs, Granule);
1201return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1202}
1203
1204unsignedgetOccupancyWithNumSGPRs(unsigned SGPRs,unsigned MaxWaves,
1205AMDGPUSubtarget::Generation Gen) {
1206if (Gen >=AMDGPUSubtarget::GFX10)
1207return MaxWaves;
1208
1209if (Gen >=AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1210if (SGPRs <= 80)
1211return 10;
1212if (SGPRs <= 88)
1213return 9;
1214if (SGPRs <= 100)
1215return 8;
1216return 7;
1217 }
1218if (SGPRs <= 48)
1219return 10;
1220if (SGPRs <= 56)
1221return 9;
1222if (SGPRs <= 64)
1223return 8;
1224if (SGPRs <= 72)
1225return 7;
1226if (SGPRs <= 80)
1227return 6;
1228return 5;
1229}
1230
1231unsignedgetMinNumVGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU) {
1232assert(WavesPerEU != 0);
1233
1234unsigned MaxWavesPerEU =getMaxWavesPerEU(STI);
1235if (WavesPerEU >= MaxWavesPerEU)
1236return 0;
1237
1238unsigned TotNumVGPRs =getTotalNumVGPRs(STI);
1239unsigned AddrsableNumVGPRs =getAddressableNumVGPRs(STI);
1240unsigned Granule =getVGPRAllocGranule(STI);
1241unsigned MaxNumVGPRs =alignDown(TotNumVGPRs / WavesPerEU, Granule);
1242
1243if (MaxNumVGPRs ==alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1244return 0;
1245
1246unsigned MinWavesPerEU =getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1247if (WavesPerEU < MinWavesPerEU)
1248returngetMinNumVGPRs(STI, MinWavesPerEU);
1249
1250unsigned MaxNumVGPRsNext =alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1251unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1252return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1253}
1254
1255unsignedgetMaxNumVGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU) {
1256assert(WavesPerEU != 0);
1257
1258unsigned MaxNumVGPRs =alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1259getVGPRAllocGranule(STI));
1260unsigned AddressableNumVGPRs =getAddressableNumVGPRs(STI);
1261return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1262}
1263
1264unsignedgetEncodedNumVGPRBlocks(constMCSubtargetInfo *STI,unsigned NumVGPRs,
1265 std::optional<bool> EnableWavefrontSize32) {
1266returngetGranulatedNumRegisterBlocks(
1267 NumVGPRs,getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1268 1;
1269}
1270
1271unsignedgetAllocatedNumVGPRBlocks(constMCSubtargetInfo *STI,
1272unsigned NumVGPRs,
1273 std::optional<bool> EnableWavefrontSize32) {
1274returngetGranulatedNumRegisterBlocks(
1275 NumVGPRs,getVGPRAllocGranule(STI, EnableWavefrontSize32));
1276}
1277}// end namespace IsaInfo
1278
1279voidinitDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1280constMCSubtargetInfo *STI) {
1281IsaVersionVersion =getIsaVersion(STI->getCPU());
1282 KernelCode.amd_kernel_code_version_major = 1;
1283 KernelCode.amd_kernel_code_version_minor = 2;
1284 KernelCode.amd_machine_kind = 1;// AMD_MACHINE_KIND_AMDGPU
1285 KernelCode.amd_machine_version_major =Version.Major;
1286 KernelCode.amd_machine_version_minor =Version.Minor;
1287 KernelCode.amd_machine_version_stepping =Version.Stepping;
1288 KernelCode.kernel_code_entry_byte_offset =sizeof(amd_kernel_code_t);
1289if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1290 KernelCode.wavefront_size = 5;
1291 KernelCode.code_properties |=AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1292 }else {
1293 KernelCode.wavefront_size = 6;
1294 }
1295
1296// If the code object does not support indirect functions, then the value must
1297// be 0xffffffff.
1298 KernelCode.call_convention = -1;
1299
1300// These alignment values are specified in powers of two, so alignment =
1301// 2^n. The minimum alignment is 2^4 = 16.
1302 KernelCode.kernarg_segment_alignment = 4;
1303 KernelCode.group_segment_alignment = 4;
1304 KernelCode.private_segment_alignment = 4;
1305
1306if (Version.Major >= 10) {
1307 KernelCode.compute_pgm_resource_registers |=
1308S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1309S_00B848_MEM_ORDERED(1);
1310 }
1311}
1312
1313boolisGroupSegment(constGlobalValue *GV) {
1314return GV->getAddressSpace() ==AMDGPUAS::LOCAL_ADDRESS;
1315}
1316
1317boolisGlobalSegment(constGlobalValue *GV) {
1318return GV->getAddressSpace() ==AMDGPUAS::GLOBAL_ADDRESS;
1319}
1320
1321boolisReadOnlySegment(constGlobalValue *GV) {
1322unsigned AS = GV->getAddressSpace();
1323return AS ==AMDGPUAS::CONSTANT_ADDRESS ||
1324 AS ==AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1325}
1326
1327boolshouldEmitConstantsToTextSection(constTriple &TT) {
1328return TT.getArch() ==Triple::r600;
1329}
1330
1331std::pair<unsigned, unsigned>
1332getIntegerPairAttribute(constFunction &F,StringRefName,
1333 std::pair<unsigned, unsigned>Default,
1334bool OnlyFirstRequired) {
1335if (auto Attr =getIntegerPairAttribute(F,Name, OnlyFirstRequired))
1336return {Attr->first, Attr->second ? *(Attr->second) :Default.second};
1337returnDefault;
1338}
1339
1340std::optional<std::pair<unsigned, std::optional<unsigned>>>
1341getIntegerPairAttribute(constFunction &F,StringRefName,
1342bool OnlyFirstRequired) {
1343AttributeA =F.getFnAttribute(Name);
1344if (!A.isStringAttribute())
1345return std::nullopt;
1346
1347LLVMContext &Ctx =F.getContext();
1348 std::pair<unsigned, std::optional<unsigned>> Ints;
1349 std::pair<StringRef, StringRef> Strs =A.getValueAsString().split(',');
1350if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1351 Ctx.emitError("can't parse first integer attribute " +Name);
1352return std::nullopt;
1353 }
1354unsigned Second = 0;
1355if (Strs.second.trim().getAsInteger(0, Second)) {
1356if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1357 Ctx.emitError("can't parse second integer attribute " +Name);
1358return std::nullopt;
1359 }
1360 }else {
1361 Ints.second = Second;
1362 }
1363
1364return Ints;
1365}
1366
1367SmallVector<unsigned>getIntegerVecAttribute(constFunction &F,StringRefName,
1368unsignedSize,
1369unsignedDefaultVal) {
1370assert(Size > 2);
1371SmallVector<unsigned>Default(Size,DefaultVal);
1372
1373AttributeA =F.getFnAttribute(Name);
1374if (!A.isStringAttribute())
1375returnDefault;
1376
1377SmallVector<unsigned> Vals(Size,DefaultVal);
1378
1379LLVMContext &Ctx =F.getContext();
1380
1381StringRef S =A.getValueAsString();
1382unsigned i = 0;
1383for (; !S.empty() && i <Size; i++) {
1384 std::pair<StringRef, StringRef> Strs = S.split(',');
1385unsigned IntVal;
1386if (Strs.first.trim().getAsInteger(0, IntVal)) {
1387 Ctx.emitError("can't parse integer attribute " + Strs.first +" in " +
1388Name);
1389returnDefault;
1390 }
1391 Vals[i] = IntVal;
1392 S = Strs.second;
1393 }
1394
1395if (!S.empty() || i <Size) {
1396 Ctx.emitError("attribute " +Name +
1397" has incorrect number of integers; expected " +
1398 llvm::utostr(Size));
1399returnDefault;
1400 }
1401return Vals;
1402}
1403
1404unsignedgetVmcntBitMask(constIsaVersion &Version) {
1405return (1 << (getVmcntBitWidthLo(Version.Major) +
1406 getVmcntBitWidthHi(Version.Major))) -
1407 1;
1408}
1409
1410unsignedgetLoadcntBitMask(constIsaVersion &Version) {
1411return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1412}
1413
1414unsignedgetSamplecntBitMask(constIsaVersion &Version) {
1415return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1416}
1417
1418unsignedgetBvhcntBitMask(constIsaVersion &Version) {
1419return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1420}
1421
1422unsignedgetExpcntBitMask(constIsaVersion &Version) {
1423return (1 << getExpcntBitWidth(Version.Major)) - 1;
1424}
1425
1426unsignedgetLgkmcntBitMask(constIsaVersion &Version) {
1427return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1428}
1429
1430unsignedgetDscntBitMask(constIsaVersion &Version) {
1431return (1 << getDscntBitWidth(Version.Major)) - 1;
1432}
1433
1434unsignedgetKmcntBitMask(constIsaVersion &Version) {
1435return (1 << getKmcntBitWidth(Version.Major)) - 1;
1436}
1437
1438unsignedgetStorecntBitMask(constIsaVersion &Version) {
1439return (1 << getStorecntBitWidth(Version.Major)) - 1;
1440}
1441
1442unsignedgetWaitcntBitMask(constIsaVersion &Version) {
1443unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1444 getVmcntBitWidthLo(Version.Major));
1445unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1446 getExpcntBitWidth(Version.Major));
1447unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1448 getLgkmcntBitWidth(Version.Major));
1449unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1450 getVmcntBitWidthHi(Version.Major));
1451return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1452}
1453
1454unsigneddecodeVmcnt(constIsaVersion &Version,unsignedWaitcnt) {
1455unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1456 getVmcntBitWidthLo(Version.Major));
1457unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1458 getVmcntBitWidthHi(Version.Major));
1459return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1460}
1461
1462unsigneddecodeExpcnt(constIsaVersion &Version,unsignedWaitcnt) {
1463return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1464 getExpcntBitWidth(Version.Major));
1465}
1466
1467unsigneddecodeLgkmcnt(constIsaVersion &Version,unsignedWaitcnt) {
1468return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1469 getLgkmcntBitWidth(Version.Major));
1470}
1471
1472voiddecodeWaitcnt(constIsaVersion &Version,unsignedWaitcnt,
1473unsigned &Vmcnt,unsigned &Expcnt,unsigned &Lgkmcnt) {
1474 Vmcnt =decodeVmcnt(Version,Waitcnt);
1475 Expcnt =decodeExpcnt(Version,Waitcnt);
1476 Lgkmcnt =decodeLgkmcnt(Version,Waitcnt);
1477}
1478
1479WaitcntdecodeWaitcnt(constIsaVersion &Version,unsigned Encoded) {
1480Waitcnt Decoded;
1481 Decoded.LoadCnt =decodeVmcnt(Version, Encoded);
1482 Decoded.ExpCnt =decodeExpcnt(Version, Encoded);
1483 Decoded.DsCnt =decodeLgkmcnt(Version, Encoded);
1484return Decoded;
1485}
1486
1487unsignedencodeVmcnt(constIsaVersion &Version,unsignedWaitcnt,
1488unsigned Vmcnt) {
1489Waitcnt = packBits(Vmcnt,Waitcnt, getVmcntBitShiftLo(Version.Major),
1490 getVmcntBitWidthLo(Version.Major));
1491return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major),Waitcnt,
1492 getVmcntBitShiftHi(Version.Major),
1493 getVmcntBitWidthHi(Version.Major));
1494}
1495
1496unsignedencodeExpcnt(constIsaVersion &Version,unsignedWaitcnt,
1497unsigned Expcnt) {
1498return packBits(Expcnt,Waitcnt, getExpcntBitShift(Version.Major),
1499 getExpcntBitWidth(Version.Major));
1500}
1501
1502unsignedencodeLgkmcnt(constIsaVersion &Version,unsignedWaitcnt,
1503unsigned Lgkmcnt) {
1504return packBits(Lgkmcnt,Waitcnt, getLgkmcntBitShift(Version.Major),
1505 getLgkmcntBitWidth(Version.Major));
1506}
1507
1508unsignedencodeWaitcnt(constIsaVersion &Version,
1509unsigned Vmcnt,unsigned Expcnt,unsigned Lgkmcnt) {
1510unsignedWaitcnt =getWaitcntBitMask(Version);
1511Waitcnt =encodeVmcnt(Version,Waitcnt, Vmcnt);
1512Waitcnt =encodeExpcnt(Version,Waitcnt, Expcnt);
1513Waitcnt =encodeLgkmcnt(Version,Waitcnt, Lgkmcnt);
1514returnWaitcnt;
1515}
1516
1517unsignedencodeWaitcnt(constIsaVersion &Version,constWaitcnt &Decoded) {
1518returnencodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1519}
1520
1521staticunsignedgetCombinedCountBitMask(constIsaVersion &Version,
1522bool IsStore) {
1523unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1524 getDscntBitWidth(Version.Major));
1525if (IsStore) {
1526unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1527 getStorecntBitWidth(Version.Major));
1528return Dscnt | Storecnt;
1529 }
1530unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1531 getLoadcntBitWidth(Version.Major));
1532return Dscnt | Loadcnt;
1533}
1534
1535WaitcntdecodeLoadcntDscnt(constIsaVersion &Version,unsigned LoadcntDscnt) {
1536Waitcnt Decoded;
1537 Decoded.LoadCnt =
1538 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1539 getLoadcntBitWidth(Version.Major));
1540 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1541 getDscntBitWidth(Version.Major));
1542return Decoded;
1543}
1544
1545WaitcntdecodeStorecntDscnt(constIsaVersion &Version,unsigned StorecntDscnt) {
1546Waitcnt Decoded;
1547 Decoded.StoreCnt =
1548 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1549 getStorecntBitWidth(Version.Major));
1550 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1551 getDscntBitWidth(Version.Major));
1552return Decoded;
1553}
1554
1555staticunsignedencodeLoadcnt(constIsaVersion &Version,unsignedWaitcnt,
1556unsigned Loadcnt) {
1557return packBits(Loadcnt,Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1558 getLoadcntBitWidth(Version.Major));
1559}
1560
1561staticunsignedencodeStorecnt(constIsaVersion &Version,unsignedWaitcnt,
1562unsigned Storecnt) {
1563return packBits(Storecnt,Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1564 getStorecntBitWidth(Version.Major));
1565}
1566
1567staticunsignedencodeDscnt(constIsaVersion &Version,unsignedWaitcnt,
1568unsigned Dscnt) {
1569return packBits(Dscnt,Waitcnt, getDscntBitShift(Version.Major),
1570 getDscntBitWidth(Version.Major));
1571}
1572
1573staticunsignedencodeLoadcntDscnt(constIsaVersion &Version,unsigned Loadcnt,
1574unsigned Dscnt) {
1575unsignedWaitcnt =getCombinedCountBitMask(Version,false);
1576Waitcnt =encodeLoadcnt(Version,Waitcnt, Loadcnt);
1577Waitcnt =encodeDscnt(Version,Waitcnt, Dscnt);
1578returnWaitcnt;
1579}
1580
1581unsignedencodeLoadcntDscnt(constIsaVersion &Version,constWaitcnt &Decoded) {
1582returnencodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1583}
1584
1585staticunsignedencodeStorecntDscnt(constIsaVersion &Version,
1586unsigned Storecnt,unsigned Dscnt) {
1587unsignedWaitcnt =getCombinedCountBitMask(Version,true);
1588Waitcnt =encodeStorecnt(Version,Waitcnt, Storecnt);
1589Waitcnt =encodeDscnt(Version,Waitcnt, Dscnt);
1590returnWaitcnt;
1591}
1592
1593unsignedencodeStorecntDscnt(constIsaVersion &Version,
1594constWaitcnt &Decoded) {
1595returnencodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1596}
1597
1598//===----------------------------------------------------------------------===//
1599// Custom Operand Values
1600//===----------------------------------------------------------------------===//
1601
1602staticunsignedgetDefaultCustomOperandEncoding(constCustomOperandVal *Opr,
1603intSize,
1604constMCSubtargetInfo &STI) {
1605unsigned Enc = 0;
1606for (intIdx = 0;Idx <Size; ++Idx) {
1607constauto &Op = Opr[Idx];
1608if (Op.isSupported(STI))
1609 Enc |=Op.encode(Op.Default);
1610 }
1611return Enc;
1612}
1613
1614staticboolisSymbolicCustomOperandEncoding(constCustomOperandVal *Opr,
1615intSize,unsigned Code,
1616bool &HasNonDefaultVal,
1617constMCSubtargetInfo &STI) {
1618unsigned UsedOprMask = 0;
1619 HasNonDefaultVal =false;
1620for (intIdx = 0;Idx <Size; ++Idx) {
1621constauto &Op = Opr[Idx];
1622if (!Op.isSupported(STI))
1623continue;
1624 UsedOprMask |=Op.getMask();
1625unsigned Val =Op.decode(Code);
1626if (!Op.isValid(Val))
1627returnfalse;
1628 HasNonDefaultVal |= (Val !=Op.Default);
1629 }
1630return (Code & ~UsedOprMask) == 0;
1631}
1632
1633staticbooldecodeCustomOperand(constCustomOperandVal *Opr,intSize,
1634unsigned Code,int &Idx,StringRef &Name,
1635unsigned &Val,bool &IsDefault,
1636constMCSubtargetInfo &STI) {
1637while (Idx <Size) {
1638constauto &Op = Opr[Idx++];
1639if (Op.isSupported(STI)) {
1640Name =Op.Name;
1641 Val =Op.decode(Code);
1642 IsDefault = (Val ==Op.Default);
1643returntrue;
1644 }
1645 }
1646
1647returnfalse;
1648}
1649
1650staticintencodeCustomOperandVal(constCustomOperandVal &Op,
1651 int64_t InputVal) {
1652if (InputVal < 0 || InputVal >Op.Max)
1653returnOPR_VAL_INVALID;
1654returnOp.encode(InputVal);
1655}
1656
1657staticintencodeCustomOperand(constCustomOperandVal *Opr,intSize,
1658constStringRefName, int64_t InputVal,
1659unsigned &UsedOprMask,
1660constMCSubtargetInfo &STI) {
1661int InvalidId =OPR_ID_UNKNOWN;
1662for (intIdx = 0;Idx <Size; ++Idx) {
1663constauto &Op = Opr[Idx];
1664if (Op.Name ==Name) {
1665if (!Op.isSupported(STI)) {
1666 InvalidId =OPR_ID_UNSUPPORTED;
1667continue;
1668 }
1669auto OprMask =Op.getMask();
1670if (OprMask & UsedOprMask)
1671returnOPR_ID_DUPLICATE;
1672 UsedOprMask |= OprMask;
1673returnencodeCustomOperandVal(Op, InputVal);
1674 }
1675 }
1676return InvalidId;
1677}
1678
1679//===----------------------------------------------------------------------===//
1680// DepCtr
1681//===----------------------------------------------------------------------===//
1682
1683namespaceDepCtr {
1684
1685intgetDefaultDepCtrEncoding(constMCSubtargetInfo &STI) {
1686staticintDefault = -1;
1687if (Default == -1)
1688Default =getDefaultCustomOperandEncoding(DepCtrInfo,DEP_CTR_SIZE, STI);
1689returnDefault;
1690}
1691
1692boolisSymbolicDepCtrEncoding(unsigned Code,bool &HasNonDefaultVal,
1693constMCSubtargetInfo &STI) {
1694returnisSymbolicCustomOperandEncoding(DepCtrInfo,DEP_CTR_SIZE, Code,
1695 HasNonDefaultVal, STI);
1696}
1697
1698booldecodeDepCtr(unsigned Code,int &Id,StringRef &Name,unsigned &Val,
1699bool &IsDefault,constMCSubtargetInfo &STI) {
1700returndecodeCustomOperand(DepCtrInfo,DEP_CTR_SIZE, Code, Id,Name, Val,
1701 IsDefault, STI);
1702}
1703
1704intencodeDepCtr(constStringRefName, int64_t Val,unsigned &UsedOprMask,
1705constMCSubtargetInfo &STI) {
1706returnencodeCustomOperand(DepCtrInfo,DEP_CTR_SIZE,Name, Val, UsedOprMask,
1707 STI);
1708}
1709
1710unsigneddecodeFieldVmVsrc(unsigned Encoded) {
1711return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1712}
1713
1714unsigneddecodeFieldVaVdst(unsigned Encoded) {
1715return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1716}
1717
1718unsigneddecodeFieldSaSdst(unsigned Encoded) {
1719return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1720}
1721
1722unsignedencodeFieldVmVsrc(unsigned Encoded,unsigned VmVsrc) {
1723return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1724}
1725
1726unsignedencodeFieldVmVsrc(unsigned VmVsrc) {
1727returnencodeFieldVmVsrc(0xffff, VmVsrc);
1728}
1729
1730unsignedencodeFieldVaVdst(unsigned Encoded,unsigned VaVdst) {
1731return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1732}
1733
1734unsignedencodeFieldVaVdst(unsigned VaVdst) {
1735returnencodeFieldVaVdst(0xffff, VaVdst);
1736}
1737
1738unsignedencodeFieldSaSdst(unsigned Encoded,unsigned SaSdst) {
1739return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1740}
1741
1742unsignedencodeFieldSaSdst(unsigned SaSdst) {
1743returnencodeFieldSaSdst(0xffff, SaSdst);
1744}
1745
1746}// namespace DepCtr
1747
1748//===----------------------------------------------------------------------===//
1749// exp tgt
1750//===----------------------------------------------------------------------===//
1751
1752namespaceExp {
1753
1754structExpTgt {
1755StringLiteralName;
1756unsignedTgt;
1757unsignedMaxIndex;
1758};
1759
1760staticconstexprExpTgtExpTgtInfo[] = {
1761 {{"null"},ET_NULL,ET_NULL_MAX_IDX},
1762 {{"mrtz"},ET_MRTZ,ET_MRTZ_MAX_IDX},
1763 {{"prim"},ET_PRIM,ET_PRIM_MAX_IDX},
1764 {{"mrt"},ET_MRT0,ET_MRT_MAX_IDX},
1765 {{"pos"},ET_POS0,ET_POS_MAX_IDX},
1766 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0,ET_DUAL_SRC_BLEND_MAX_IDX},
1767 {{"param"},ET_PARAM0,ET_PARAM_MAX_IDX},
1768};
1769
1770boolgetTgtName(unsigned Id,StringRef &Name,int &Index) {
1771for (constExpTgt &Val :ExpTgtInfo) {
1772if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1773Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1774Name = Val.Name;
1775returntrue;
1776 }
1777 }
1778returnfalse;
1779}
1780
1781unsignedgetTgtId(constStringRefName) {
1782
1783for (constExpTgt &Val :ExpTgtInfo) {
1784if (Val.MaxIndex == 0 &&Name == Val.Name)
1785return Val.Tgt;
1786
1787if (Val.MaxIndex > 0 &&Name.starts_with(Val.Name)) {
1788StringRef Suffix =Name.drop_front(Val.Name.size());
1789
1790unsigned Id;
1791if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1792returnET_INVALID;
1793
1794// Disable leading zeroes
1795if (Suffix.size() > 1 && Suffix[0] =='0')
1796returnET_INVALID;
1797
1798return Val.Tgt + Id;
1799 }
1800 }
1801returnET_INVALID;
1802}
1803
1804boolisSupportedTgtId(unsigned Id,constMCSubtargetInfo &STI) {
1805switch (Id) {
1806caseET_NULL:
1807return !isGFX11Plus(STI);
1808caseET_POS4:
1809caseET_PRIM:
1810returnisGFX10Plus(STI);
1811caseET_DUAL_SRC_BLEND0:
1812caseET_DUAL_SRC_BLEND1:
1813returnisGFX11Plus(STI);
1814default:
1815if (Id >=ET_PARAM0 && Id <=ET_PARAM31)
1816return !isGFX11Plus(STI);
1817returntrue;
1818 }
1819}
1820
1821}// namespace Exp
1822
1823//===----------------------------------------------------------------------===//
1824// MTBUF Format
1825//===----------------------------------------------------------------------===//
1826
1827namespaceMTBUFFormat {
1828
1829int64_tgetDfmt(constStringRefName) {
1830for (int Id =DFMT_MIN; Id <=DFMT_MAX; ++Id) {
1831if (Name ==DfmtSymbolic[Id])
1832return Id;
1833 }
1834returnDFMT_UNDEF;
1835}
1836
1837StringRefgetDfmtName(unsigned Id) {
1838assert(Id <=DFMT_MAX);
1839returnDfmtSymbolic[Id];
1840}
1841
1842staticStringLiteralconst *getNfmtLookupTable(constMCSubtargetInfo &STI) {
1843if (isSI(STI) ||isCI(STI))
1844returnNfmtSymbolicSICI;
1845if (isVI(STI) ||isGFX9(STI))
1846returnNfmtSymbolicVI;
1847returnNfmtSymbolicGFX10;
1848}
1849
1850int64_tgetNfmt(constStringRefName,constMCSubtargetInfo &STI) {
1851constauto *lookupTable =getNfmtLookupTable(STI);
1852for (int Id =NFMT_MIN; Id <=NFMT_MAX; ++Id) {
1853if (Name == lookupTable[Id])
1854return Id;
1855 }
1856returnNFMT_UNDEF;
1857}
1858
1859StringRefgetNfmtName(unsigned Id,constMCSubtargetInfo &STI) {
1860assert(Id <=NFMT_MAX);
1861returngetNfmtLookupTable(STI)[Id];
1862}
1863
1864boolisValidDfmtNfmt(unsigned Id,constMCSubtargetInfo &STI) {
1865unsigned Dfmt;
1866unsigned Nfmt;
1867decodeDfmtNfmt(Id, Dfmt, Nfmt);
1868returnisValidNfmt(Nfmt, STI);
1869}
1870
1871boolisValidNfmt(unsigned Id,constMCSubtargetInfo &STI) {
1872return !getNfmtName(Id, STI).empty();
1873}
1874
1875int64_tencodeDfmtNfmt(unsigned Dfmt,unsigned Nfmt) {
1876return (Dfmt <<DFMT_SHIFT) | (Nfmt <<NFMT_SHIFT);
1877}
1878
1879voiddecodeDfmtNfmt(unsignedFormat,unsigned &Dfmt,unsigned &Nfmt) {
1880 Dfmt = (Format >>DFMT_SHIFT) &DFMT_MASK;
1881 Nfmt = (Format >>NFMT_SHIFT) &NFMT_MASK;
1882}
1883
1884int64_tgetUnifiedFormat(constStringRefName,constMCSubtargetInfo &STI) {
1885if (isGFX11Plus(STI)) {
1886for (int Id =UfmtGFX11::UFMT_FIRST; Id <=UfmtGFX11::UFMT_LAST; ++Id) {
1887if (Name ==UfmtSymbolicGFX11[Id])
1888return Id;
1889 }
1890 }else {
1891for (int Id =UfmtGFX10::UFMT_FIRST; Id <=UfmtGFX10::UFMT_LAST; ++Id) {
1892if (Name ==UfmtSymbolicGFX10[Id])
1893return Id;
1894 }
1895 }
1896returnUFMT_UNDEF;
1897}
1898
1899StringRefgetUnifiedFormatName(unsigned Id,constMCSubtargetInfo &STI) {
1900if(isValidUnifiedFormat(Id, STI))
1901returnisGFX10(STI) ?UfmtSymbolicGFX10[Id] :UfmtSymbolicGFX11[Id];
1902return"";
1903}
1904
1905boolisValidUnifiedFormat(unsigned Id,constMCSubtargetInfo &STI) {
1906returnisGFX10(STI) ? Id <=UfmtGFX10::UFMT_LAST : Id <=UfmtGFX11::UFMT_LAST;
1907}
1908
1909int64_tconvertDfmtNfmt2Ufmt(unsigned Dfmt,unsigned Nfmt,
1910constMCSubtargetInfo &STI) {
1911 int64_t Fmt =encodeDfmtNfmt(Dfmt, Nfmt);
1912if (isGFX11Plus(STI)) {
1913for (int Id =UfmtGFX11::UFMT_FIRST; Id <=UfmtGFX11::UFMT_LAST; ++Id) {
1914if (Fmt ==DfmtNfmt2UFmtGFX11[Id])
1915return Id;
1916 }
1917 }else {
1918for (int Id =UfmtGFX10::UFMT_FIRST; Id <=UfmtGFX10::UFMT_LAST; ++Id) {
1919if (Fmt ==DfmtNfmt2UFmtGFX10[Id])
1920return Id;
1921 }
1922 }
1923returnUFMT_UNDEF;
1924}
1925
1926boolisValidFormatEncoding(unsigned Val,constMCSubtargetInfo &STI) {
1927returnisGFX10Plus(STI) ? (Val <=UFMT_MAX) : (Val <=DFMT_NFMT_MAX);
1928}
1929
1930unsignedgetDefaultFormatEncoding(constMCSubtargetInfo &STI) {
1931if (isGFX10Plus(STI))
1932returnUFMT_DEFAULT;
1933returnDFMT_NFMT_DEFAULT;
1934}
1935
1936}// namespace MTBUFFormat
1937
1938//===----------------------------------------------------------------------===//
1939// SendMsg
1940//===----------------------------------------------------------------------===//
1941
1942namespaceSendMsg {
1943
1944staticuint64_tgetMsgIdMask(constMCSubtargetInfo &STI) {
1945returnisGFX11Plus(STI) ?ID_MASK_GFX11Plus_ :ID_MASK_PreGFX11_;
1946}
1947
1948boolisValidMsgId(int64_t MsgId,constMCSubtargetInfo &STI) {
1949return (MsgId & ~(getMsgIdMask(STI))) == 0;
1950}
1951
1952boolisValidMsgOp(int64_t MsgId, int64_t OpId,constMCSubtargetInfo &STI,
1953bool Strict) {
1954assert(isValidMsgId(MsgId, STI));
1955
1956if (!Strict)
1957return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1958
1959if (msgRequiresOp(MsgId, STI)) {
1960if (MsgId ==ID_GS_PreGFX11 && OpId ==OP_GS_NOP)
1961returnfalse;
1962
1963return !getMsgOpName(MsgId, OpId, STI).empty();
1964 }
1965
1966return OpId ==OP_NONE_;
1967}
1968
1969boolisValidMsgStream(int64_t MsgId, int64_t OpId, int64_tStreamId,
1970constMCSubtargetInfo &STI,bool Strict) {
1971assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1972
1973if (!Strict)
1974return 0 <=StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1975
1976if (!isGFX11Plus(STI)) {
1977switch (MsgId) {
1978caseID_GS_PreGFX11:
1979returnSTREAM_ID_FIRST_ <=StreamId &&StreamId <STREAM_ID_LAST_;
1980caseID_GS_DONE_PreGFX11:
1981return (OpId ==OP_GS_NOP) ?
1982 (StreamId ==STREAM_ID_NONE_) :
1983 (STREAM_ID_FIRST_ <=StreamId &&StreamId <STREAM_ID_LAST_);
1984 }
1985 }
1986returnStreamId ==STREAM_ID_NONE_;
1987}
1988
1989boolmsgRequiresOp(int64_t MsgId,constMCSubtargetInfo &STI) {
1990return MsgId ==ID_SYSMSG ||
1991 (!isGFX11Plus(STI) &&
1992 (MsgId ==ID_GS_PreGFX11 || MsgId ==ID_GS_DONE_PreGFX11));
1993}
1994
1995boolmsgSupportsStream(int64_t MsgId, int64_t OpId,
1996constMCSubtargetInfo &STI) {
1997return !isGFX11Plus(STI) &&
1998 (MsgId ==ID_GS_PreGFX11 || MsgId ==ID_GS_DONE_PreGFX11) &&
1999 OpId !=OP_GS_NOP;
2000}
2001
2002voiddecodeMsg(unsigned Val,uint16_t &MsgId,uint16_t &OpId,
2003uint16_t &StreamId,constMCSubtargetInfo &STI) {
2004 MsgId = Val &getMsgIdMask(STI);
2005if (isGFX11Plus(STI)) {
2006 OpId = 0;
2007StreamId = 0;
2008 }else {
2009 OpId = (Val &OP_MASK_) >>OP_SHIFT_;
2010StreamId = (Val &STREAM_ID_MASK_) >>STREAM_ID_SHIFT_;
2011 }
2012}
2013
2014uint64_tencodeMsg(uint64_t MsgId,
2015uint64_t OpId,
2016uint64_tStreamId) {
2017return MsgId | (OpId <<OP_SHIFT_) | (StreamId <<STREAM_ID_SHIFT_);
2018}
2019
2020}// namespace SendMsg
2021
2022//===----------------------------------------------------------------------===//
2023//
2024//===----------------------------------------------------------------------===//
2025
2026unsignedgetInitialPSInputAddr(constFunction &F) {
2027returnF.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2028}
2029
2030boolgetHasColorExport(constFunction &F) {
2031// As a safe default always respond as if PS has color exports.
2032returnF.getFnAttributeAsParsedInteger(
2033"amdgpu-color-export",
2034F.getCallingConv() ==CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2035}
2036
2037boolgetHasDepthExport(constFunction &F) {
2038returnF.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2039}
2040
2041boolisShader(CallingConv::ID cc) {
2042switch(cc) {
2043caseCallingConv::AMDGPU_VS:
2044caseCallingConv::AMDGPU_LS:
2045caseCallingConv::AMDGPU_HS:
2046caseCallingConv::AMDGPU_ES:
2047caseCallingConv::AMDGPU_GS:
2048caseCallingConv::AMDGPU_PS:
2049caseCallingConv::AMDGPU_CS_Chain:
2050caseCallingConv::AMDGPU_CS_ChainPreserve:
2051caseCallingConv::AMDGPU_CS:
2052returntrue;
2053default:
2054returnfalse;
2055 }
2056}
2057
2058boolisGraphics(CallingConv::ID cc) {
2059returnisShader(cc) || cc ==CallingConv::AMDGPU_Gfx;
2060}
2061
2062boolisCompute(CallingConv::ID cc) {
2063return !isGraphics(cc) || cc ==CallingConv::AMDGPU_CS;
2064}
2065
2066boolisEntryFunctionCC(CallingConv::IDCC) {
2067switch (CC) {
2068caseCallingConv::AMDGPU_KERNEL:
2069caseCallingConv::SPIR_KERNEL:
2070caseCallingConv::AMDGPU_VS:
2071caseCallingConv::AMDGPU_GS:
2072caseCallingConv::AMDGPU_PS:
2073caseCallingConv::AMDGPU_CS:
2074caseCallingConv::AMDGPU_ES:
2075caseCallingConv::AMDGPU_HS:
2076caseCallingConv::AMDGPU_LS:
2077returntrue;
2078default:
2079returnfalse;
2080 }
2081}
2082
2083boolisModuleEntryFunctionCC(CallingConv::IDCC) {
2084switch (CC) {
2085caseCallingConv::AMDGPU_Gfx:
2086returntrue;
2087default:
2088returnisEntryFunctionCC(CC) ||isChainCC(CC);
2089 }
2090}
2091
2092boolisChainCC(CallingConv::IDCC) {
2093switch (CC) {
2094caseCallingConv::AMDGPU_CS_Chain:
2095caseCallingConv::AMDGPU_CS_ChainPreserve:
2096returntrue;
2097default:
2098returnfalse;
2099 }
2100}
2101
2102boolisKernelCC(constFunction *Func) {
2103returnAMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2104}
2105
2106boolhasXNACK(constMCSubtargetInfo &STI) {
2107return STI.hasFeature(AMDGPU::FeatureXNACK);
2108}
2109
2110boolhasSRAMECC(constMCSubtargetInfo &STI) {
2111return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2112}
2113
2114boolhasMIMG_R128(constMCSubtargetInfo &STI) {
2115return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2116}
2117
2118boolhasA16(constMCSubtargetInfo &STI) {
2119return STI.hasFeature(AMDGPU::FeatureA16);
2120}
2121
2122boolhasG16(constMCSubtargetInfo &STI) {
2123return STI.hasFeature(AMDGPU::FeatureG16);
2124}
2125
2126boolhasPackedD16(constMCSubtargetInfo &STI) {
2127return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2128 !isSI(STI);
2129}
2130
2131boolhasGDS(constMCSubtargetInfo &STI) {
2132return STI.hasFeature(AMDGPU::FeatureGDS);
2133}
2134
2135unsignedgetNSAMaxSize(constMCSubtargetInfo &STI,bool HasSampler) {
2136autoVersion =getIsaVersion(STI.getCPU());
2137if (Version.Major == 10)
2138returnVersion.Minor >= 3 ? 13 : 5;
2139if (Version.Major == 11)
2140return 5;
2141if (Version.Major >= 12)
2142return HasSampler ? 4 : 5;
2143return 0;
2144}
2145
2146unsignedgetMaxNumUserSGPRs(constMCSubtargetInfo &STI) {return 16; }
2147
2148boolisSI(constMCSubtargetInfo &STI) {
2149return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2150}
2151
2152boolisCI(constMCSubtargetInfo &STI) {
2153return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2154}
2155
2156boolisVI(constMCSubtargetInfo &STI) {
2157return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2158}
2159
2160boolisGFX9(constMCSubtargetInfo &STI) {
2161return STI.hasFeature(AMDGPU::FeatureGFX9);
2162}
2163
2164boolisGFX9_GFX10(constMCSubtargetInfo &STI) {
2165returnisGFX9(STI) ||isGFX10(STI);
2166}
2167
2168boolisGFX9_GFX10_GFX11(constMCSubtargetInfo &STI) {
2169returnisGFX9(STI) ||isGFX10(STI) ||isGFX11(STI);
2170}
2171
2172boolisGFX8_GFX9_GFX10(constMCSubtargetInfo &STI) {
2173returnisVI(STI) ||isGFX9(STI) ||isGFX10(STI);
2174}
2175
2176boolisGFX8Plus(constMCSubtargetInfo &STI) {
2177returnisVI(STI) ||isGFX9Plus(STI);
2178}
2179
2180boolisGFX9Plus(constMCSubtargetInfo &STI) {
2181returnisGFX9(STI) ||isGFX10Plus(STI);
2182}
2183
2184boolisNotGFX9Plus(constMCSubtargetInfo &STI) {return !isGFX9Plus(STI); }
2185
2186boolisGFX10(constMCSubtargetInfo &STI) {
2187return STI.hasFeature(AMDGPU::FeatureGFX10);
2188}
2189
2190boolisGFX10_GFX11(constMCSubtargetInfo &STI) {
2191returnisGFX10(STI) ||isGFX11(STI);
2192}
2193
2194boolisGFX10Plus(constMCSubtargetInfo &STI) {
2195returnisGFX10(STI) ||isGFX11Plus(STI);
2196}
2197
2198boolisGFX11(constMCSubtargetInfo &STI) {
2199return STI.hasFeature(AMDGPU::FeatureGFX11);
2200}
2201
2202boolisGFX11Plus(constMCSubtargetInfo &STI) {
2203returnisGFX11(STI) ||isGFX12Plus(STI);
2204}
2205
2206boolisGFX12(constMCSubtargetInfo &STI) {
2207return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2208}
2209
2210boolisGFX12Plus(constMCSubtargetInfo &STI) {returnisGFX12(STI); }
2211
2212boolisNotGFX12Plus(constMCSubtargetInfo &STI) {return !isGFX12Plus(STI); }
2213
2214boolisNotGFX11Plus(constMCSubtargetInfo &STI) {
2215return !isGFX11Plus(STI);
2216}
2217
2218boolisNotGFX10Plus(constMCSubtargetInfo &STI) {
2219returnisSI(STI) ||isCI(STI) ||isVI(STI) ||isGFX9(STI);
2220}
2221
2222boolisGFX10Before1030(constMCSubtargetInfo &STI) {
2223returnisGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2224}
2225
2226boolisGCN3Encoding(constMCSubtargetInfo &STI) {
2227return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2228}
2229
2230boolisGFX10_AEncoding(constMCSubtargetInfo &STI) {
2231return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2232}
2233
2234boolisGFX10_BEncoding(constMCSubtargetInfo &STI) {
2235return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2236}
2237
2238boolhasGFX10_3Insts(constMCSubtargetInfo &STI) {
2239return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2240}
2241
2242boolisGFX10_3_GFX11(constMCSubtargetInfo &STI) {
2243returnisGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2244}
2245
2246boolisGFX90A(constMCSubtargetInfo &STI) {
2247return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2248}
2249
2250boolisGFX940(constMCSubtargetInfo &STI) {
2251return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2252}
2253
2254boolhasArchitectedFlatScratch(constMCSubtargetInfo &STI) {
2255return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2256}
2257
2258boolhasMAIInsts(constMCSubtargetInfo &STI) {
2259return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2260}
2261
2262boolhasVOPD(constMCSubtargetInfo &STI) {
2263return STI.hasFeature(AMDGPU::FeatureVOPD);
2264}
2265
2266boolhasDPPSrc1SGPR(constMCSubtargetInfo &STI) {
2267return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2268}
2269
2270unsignedhasKernargPreload(constMCSubtargetInfo &STI) {
2271return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2272}
2273
2274int32_tgetTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2275 int32_t ArgNumVGPR) {
2276if (has90AInsts && ArgNumAGPR)
2277returnalignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2278return std::max(ArgNumVGPR, ArgNumAGPR);
2279}
2280
2281boolisSGPR(MCRegisterReg,constMCRegisterInfo *TRI) {
2282constMCRegisterClass SGPRClass =TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2283constMCRegister FirstSubReg =TRI->getSubReg(Reg, AMDGPU::sub0);
2284return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg :Reg) ||
2285Reg == AMDGPU::SCC;
2286}
2287
2288boolisHi16Reg(MCRegisterReg,constMCRegisterInfo &MRI) {
2289returnMRI.getEncodingValue(Reg) &AMDGPU::HWEncoding::IS_HI16;
2290}
2291
2292#define MAP_REG2REG \
2293 using namespace AMDGPU; \
2294 switch(Reg.id()) { \
2295 default: return Reg; \
2296 CASE_CI_VI(FLAT_SCR) \
2297 CASE_CI_VI(FLAT_SCR_LO) \
2298 CASE_CI_VI(FLAT_SCR_HI) \
2299 CASE_VI_GFX9PLUS(TTMP0) \
2300 CASE_VI_GFX9PLUS(TTMP1) \
2301 CASE_VI_GFX9PLUS(TTMP2) \
2302 CASE_VI_GFX9PLUS(TTMP3) \
2303 CASE_VI_GFX9PLUS(TTMP4) \
2304 CASE_VI_GFX9PLUS(TTMP5) \
2305 CASE_VI_GFX9PLUS(TTMP6) \
2306 CASE_VI_GFX9PLUS(TTMP7) \
2307 CASE_VI_GFX9PLUS(TTMP8) \
2308 CASE_VI_GFX9PLUS(TTMP9) \
2309 CASE_VI_GFX9PLUS(TTMP10) \
2310 CASE_VI_GFX9PLUS(TTMP11) \
2311 CASE_VI_GFX9PLUS(TTMP12) \
2312 CASE_VI_GFX9PLUS(TTMP13) \
2313 CASE_VI_GFX9PLUS(TTMP14) \
2314 CASE_VI_GFX9PLUS(TTMP15) \
2315 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2316 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2317 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2318 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2319 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2320 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2321 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2322 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2323 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2324 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2325 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2326 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2327 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2328 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2329 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2330 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2331 CASE_GFXPRE11_GFX11PLUS(M0) \
2332 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2333 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2334 }
2335
2336#define CASE_CI_VI(node) \
2337 assert(!isSI(STI)); \
2338 case node: return isCI(STI) ? node##_ci : node##_vi;
2339
2340#define CASE_VI_GFX9PLUS(node) \
2341 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2342
2343#define CASE_GFXPRE11_GFX11PLUS(node) \
2344 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2345
2346#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2347 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2348
2349MCRegistergetMCReg(MCRegisterReg,constMCSubtargetInfo &STI) {
2350if (STI.getTargetTriple().getArch() ==Triple::r600)
2351returnReg;
2352MAP_REG2REG
2353}
2354
2355#undef CASE_CI_VI
2356#undef CASE_VI_GFX9PLUS
2357#undef CASE_GFXPRE11_GFX11PLUS
2358#undef CASE_GFXPRE11_GFX11PLUS_TO
2359
2360#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2361#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2362#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2363#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2364
2365MCRegistermc2PseudoReg(MCRegisterReg) {MAP_REG2REG }
2366
2367boolisInlineValue(unsignedReg) {
2368switch (Reg) {
2369case AMDGPU::SRC_SHARED_BASE_LO:
2370case AMDGPU::SRC_SHARED_BASE:
2371case AMDGPU::SRC_SHARED_LIMIT_LO:
2372case AMDGPU::SRC_SHARED_LIMIT:
2373case AMDGPU::SRC_PRIVATE_BASE_LO:
2374case AMDGPU::SRC_PRIVATE_BASE:
2375case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2376case AMDGPU::SRC_PRIVATE_LIMIT:
2377case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2378returntrue;
2379case AMDGPU::SRC_VCCZ:
2380case AMDGPU::SRC_EXECZ:
2381case AMDGPU::SRC_SCC:
2382returntrue;
2383case AMDGPU::SGPR_NULL:
2384returntrue;
2385default:
2386returnfalse;
2387 }
2388}
2389
2390#undef CASE_CI_VI
2391#undef CASE_VI_GFX9PLUS
2392#undef CASE_GFXPRE11_GFX11PLUS
2393#undef CASE_GFXPRE11_GFX11PLUS_TO
2394#undef MAP_REG2REG
2395
2396boolisSISrcOperand(constMCInstrDesc &Desc,unsigned OpNo) {
2397assert(OpNo <Desc.NumOperands);
2398unsigned OpType =Desc.operands()[OpNo].OperandType;
2399return OpType >=AMDGPU::OPERAND_SRC_FIRST &&
2400 OpType <=AMDGPU::OPERAND_SRC_LAST;
2401}
2402
2403boolisKImmOperand(constMCInstrDesc &Desc,unsigned OpNo) {
2404assert(OpNo <Desc.NumOperands);
2405unsigned OpType =Desc.operands()[OpNo].OperandType;
2406return OpType >=AMDGPU::OPERAND_KIMM_FIRST &&
2407 OpType <=AMDGPU::OPERAND_KIMM_LAST;
2408}
2409
2410boolisSISrcFPOperand(constMCInstrDesc &Desc,unsigned OpNo) {
2411assert(OpNo <Desc.NumOperands);
2412unsigned OpType =Desc.operands()[OpNo].OperandType;
2413switch (OpType) {
2414caseAMDGPU::OPERAND_REG_IMM_FP32:
2415caseAMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2416caseAMDGPU::OPERAND_REG_IMM_FP64:
2417caseAMDGPU::OPERAND_REG_IMM_FP16:
2418caseAMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2419caseAMDGPU::OPERAND_REG_IMM_V2FP16:
2420caseAMDGPU::OPERAND_REG_INLINE_C_FP32:
2421caseAMDGPU::OPERAND_REG_INLINE_C_FP64:
2422caseAMDGPU::OPERAND_REG_INLINE_C_FP16:
2423caseAMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2424caseAMDGPU::OPERAND_REG_INLINE_AC_FP32:
2425caseAMDGPU::OPERAND_REG_INLINE_AC_FP16:
2426caseAMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2427caseAMDGPU::OPERAND_REG_IMM_V2FP32:
2428caseAMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2429caseAMDGPU::OPERAND_REG_INLINE_AC_FP64:
2430returntrue;
2431default:
2432returnfalse;
2433 }
2434}
2435
2436boolisSISrcInlinableOperand(constMCInstrDesc &Desc,unsigned OpNo) {
2437assert(OpNo <Desc.NumOperands);
2438unsigned OpType =Desc.operands()[OpNo].OperandType;
2439return (OpType >=AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2440 OpType <=AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2441 (OpType >=AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2442 OpType <=AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2443}
2444
2445// Avoid using MCRegisterClass::getSize, since that function will go away
2446// (move from MC* level to Target* level). Return size in bits.
2447unsignedgetRegBitWidth(unsigned RCID) {
2448switch (RCID) {
2449case AMDGPU::SGPR_LO16RegClassID:
2450case AMDGPU::AGPR_LO16RegClassID:
2451return 16;
2452case AMDGPU::SGPR_32RegClassID:
2453case AMDGPU::VGPR_32RegClassID:
2454case AMDGPU::VRegOrLds_32RegClassID:
2455case AMDGPU::AGPR_32RegClassID:
2456case AMDGPU::VS_32RegClassID:
2457case AMDGPU::AV_32RegClassID:
2458case AMDGPU::SReg_32RegClassID:
2459case AMDGPU::SReg_32_XM0RegClassID:
2460case AMDGPU::SRegOrLds_32RegClassID:
2461return 32;
2462case AMDGPU::SGPR_64RegClassID:
2463case AMDGPU::VS_64RegClassID:
2464case AMDGPU::SReg_64RegClassID:
2465case AMDGPU::VReg_64RegClassID:
2466case AMDGPU::AReg_64RegClassID:
2467case AMDGPU::SReg_64_XEXECRegClassID:
2468case AMDGPU::VReg_64_Align2RegClassID:
2469case AMDGPU::AReg_64_Align2RegClassID:
2470case AMDGPU::AV_64RegClassID:
2471case AMDGPU::AV_64_Align2RegClassID:
2472return 64;
2473case AMDGPU::SGPR_96RegClassID:
2474case AMDGPU::SReg_96RegClassID:
2475case AMDGPU::VReg_96RegClassID:
2476case AMDGPU::AReg_96RegClassID:
2477case AMDGPU::VReg_96_Align2RegClassID:
2478case AMDGPU::AReg_96_Align2RegClassID:
2479case AMDGPU::AV_96RegClassID:
2480case AMDGPU::AV_96_Align2RegClassID:
2481return 96;
2482case AMDGPU::SGPR_128RegClassID:
2483case AMDGPU::SReg_128RegClassID:
2484case AMDGPU::VReg_128RegClassID:
2485case AMDGPU::AReg_128RegClassID:
2486case AMDGPU::VReg_128_Align2RegClassID:
2487case AMDGPU::AReg_128_Align2RegClassID:
2488case AMDGPU::AV_128RegClassID:
2489case AMDGPU::AV_128_Align2RegClassID:
2490case AMDGPU::SReg_128_XNULLRegClassID:
2491return 128;
2492case AMDGPU::SGPR_160RegClassID:
2493case AMDGPU::SReg_160RegClassID:
2494case AMDGPU::VReg_160RegClassID:
2495case AMDGPU::AReg_160RegClassID:
2496case AMDGPU::VReg_160_Align2RegClassID:
2497case AMDGPU::AReg_160_Align2RegClassID:
2498case AMDGPU::AV_160RegClassID:
2499case AMDGPU::AV_160_Align2RegClassID:
2500return 160;
2501case AMDGPU::SGPR_192RegClassID:
2502case AMDGPU::SReg_192RegClassID:
2503case AMDGPU::VReg_192RegClassID:
2504case AMDGPU::AReg_192RegClassID:
2505case AMDGPU::VReg_192_Align2RegClassID:
2506case AMDGPU::AReg_192_Align2RegClassID:
2507case AMDGPU::AV_192RegClassID:
2508case AMDGPU::AV_192_Align2RegClassID:
2509return 192;
2510case AMDGPU::SGPR_224RegClassID:
2511case AMDGPU::SReg_224RegClassID:
2512case AMDGPU::VReg_224RegClassID:
2513case AMDGPU::AReg_224RegClassID:
2514case AMDGPU::VReg_224_Align2RegClassID:
2515case AMDGPU::AReg_224_Align2RegClassID:
2516case AMDGPU::AV_224RegClassID:
2517case AMDGPU::AV_224_Align2RegClassID:
2518return 224;
2519case AMDGPU::SGPR_256RegClassID:
2520case AMDGPU::SReg_256RegClassID:
2521case AMDGPU::VReg_256RegClassID:
2522case AMDGPU::AReg_256RegClassID:
2523case AMDGPU::VReg_256_Align2RegClassID:
2524case AMDGPU::AReg_256_Align2RegClassID:
2525case AMDGPU::AV_256RegClassID:
2526case AMDGPU::AV_256_Align2RegClassID:
2527case AMDGPU::SReg_256_XNULLRegClassID:
2528return 256;
2529case AMDGPU::SGPR_288RegClassID:
2530case AMDGPU::SReg_288RegClassID:
2531case AMDGPU::VReg_288RegClassID:
2532case AMDGPU::AReg_288RegClassID:
2533case AMDGPU::VReg_288_Align2RegClassID:
2534case AMDGPU::AReg_288_Align2RegClassID:
2535case AMDGPU::AV_288RegClassID:
2536case AMDGPU::AV_288_Align2RegClassID:
2537return 288;
2538case AMDGPU::SGPR_320RegClassID:
2539case AMDGPU::SReg_320RegClassID:
2540case AMDGPU::VReg_320RegClassID:
2541case AMDGPU::AReg_320RegClassID:
2542case AMDGPU::VReg_320_Align2RegClassID:
2543case AMDGPU::AReg_320_Align2RegClassID:
2544case AMDGPU::AV_320RegClassID:
2545case AMDGPU::AV_320_Align2RegClassID:
2546return 320;
2547case AMDGPU::SGPR_352RegClassID:
2548case AMDGPU::SReg_352RegClassID:
2549case AMDGPU::VReg_352RegClassID:
2550case AMDGPU::AReg_352RegClassID:
2551case AMDGPU::VReg_352_Align2RegClassID:
2552case AMDGPU::AReg_352_Align2RegClassID:
2553case AMDGPU::AV_352RegClassID:
2554case AMDGPU::AV_352_Align2RegClassID:
2555return 352;
2556case AMDGPU::SGPR_384RegClassID:
2557case AMDGPU::SReg_384RegClassID:
2558case AMDGPU::VReg_384RegClassID:
2559case AMDGPU::AReg_384RegClassID:
2560case AMDGPU::VReg_384_Align2RegClassID:
2561case AMDGPU::AReg_384_Align2RegClassID:
2562case AMDGPU::AV_384RegClassID:
2563case AMDGPU::AV_384_Align2RegClassID:
2564return 384;
2565case AMDGPU::SGPR_512RegClassID:
2566case AMDGPU::SReg_512RegClassID:
2567case AMDGPU::VReg_512RegClassID:
2568case AMDGPU::AReg_512RegClassID:
2569case AMDGPU::VReg_512_Align2RegClassID:
2570case AMDGPU::AReg_512_Align2RegClassID:
2571case AMDGPU::AV_512RegClassID:
2572case AMDGPU::AV_512_Align2RegClassID:
2573return 512;
2574case AMDGPU::SGPR_1024RegClassID:
2575case AMDGPU::SReg_1024RegClassID:
2576case AMDGPU::VReg_1024RegClassID:
2577case AMDGPU::AReg_1024RegClassID:
2578case AMDGPU::VReg_1024_Align2RegClassID:
2579case AMDGPU::AReg_1024_Align2RegClassID:
2580case AMDGPU::AV_1024RegClassID:
2581case AMDGPU::AV_1024_Align2RegClassID:
2582return 1024;
2583default:
2584llvm_unreachable("Unexpected register class");
2585 }
2586}
2587
2588unsignedgetRegBitWidth(constMCRegisterClass &RC) {
2589returngetRegBitWidth(RC.getID());
2590}
2591
2592unsignedgetRegOperandSize(constMCRegisterInfo *MRI,constMCInstrDesc &Desc,
2593unsigned OpNo) {
2594assert(OpNo <Desc.NumOperands);
2595unsigned RCID =Desc.operands()[OpNo].RegClass;
2596returngetRegBitWidth(RCID) / 8;
2597}
2598
2599boolisInlinableLiteral64(int64_tLiteral,bool HasInv2Pi) {
2600if (isInlinableIntLiteral(Literal))
2601returntrue;
2602
2603uint64_t Val =static_cast<uint64_t>(Literal);
2604return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2605 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2606 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2607 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2608 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2609 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2610 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2611 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2612 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2613 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2614}
2615
2616boolisInlinableLiteral32(int32_tLiteral,bool HasInv2Pi) {
2617if (isInlinableIntLiteral(Literal))
2618returntrue;
2619
2620// The actual type of the operand does not seem to matter as long
2621// as the bits match one of the inline immediate values. For example:
2622//
2623// -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2624// so it is a legal inline immediate.
2625//
2626// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2627// floating-point, so it is a legal inline immediate.
2628
2629uint32_t Val =static_cast<uint32_t>(Literal);
2630return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2631 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2632 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2633 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2634 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2635 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2636 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2637 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2638 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2639 (Val == 0x3e22f983 && HasInv2Pi);
2640}
2641
2642boolisInlinableLiteralBF16(int16_tLiteral,bool HasInv2Pi) {
2643if (!HasInv2Pi)
2644returnfalse;
2645if (isInlinableIntLiteral(Literal))
2646returntrue;
2647uint16_t Val =static_cast<uint16_t>(Literal);
2648return Val == 0x3F00 ||// 0.5
2649 Val == 0xBF00 ||// -0.5
2650 Val == 0x3F80 ||// 1.0
2651 Val == 0xBF80 ||// -1.0
2652 Val == 0x4000 ||// 2.0
2653 Val == 0xC000 ||// -2.0
2654 Val == 0x4080 ||// 4.0
2655 Val == 0xC080 ||// -4.0
2656 Val == 0x3E22;// 1.0 / (2.0 * pi)
2657}
2658
2659boolisInlinableLiteralI16(int32_tLiteral,bool HasInv2Pi) {
2660returnisInlinableLiteral32(Literal, HasInv2Pi);
2661}
2662
2663boolisInlinableLiteralFP16(int16_tLiteral,bool HasInv2Pi) {
2664if (!HasInv2Pi)
2665returnfalse;
2666if (isInlinableIntLiteral(Literal))
2667returntrue;
2668uint16_t Val =static_cast<uint16_t>(Literal);
2669return Val == 0x3C00 ||// 1.0
2670 Val == 0xBC00 ||// -1.0
2671 Val == 0x3800 ||// 0.5
2672 Val == 0xB800 ||// -0.5
2673 Val == 0x4000 ||// 2.0
2674 Val == 0xC000 ||// -2.0
2675 Val == 0x4400 ||// 4.0
2676 Val == 0xC400 ||// -4.0
2677 Val == 0x3118;// 1/2pi
2678}
2679
2680std::optional<unsigned>getInlineEncodingV216(bool IsFloat,uint32_tLiteral) {
2681// Unfortunately, the Instruction Set Architecture Reference Guide is
2682// misleading about how the inline operands work for (packed) 16-bit
2683// instructions. In a nutshell, the actual HW behavior is:
2684//
2685// - integer encodings (-16 .. 64) are always produced as sign-extended
2686// 32-bit values
2687// - float encodings are produced as:
2688// - for F16 instructions: corresponding half-precision float values in
2689// the LSBs, 0 in the MSBs
2690// - for UI16 instructions: corresponding single-precision float value
2691 int32_tSigned =static_cast<int32_t>(Literal);
2692if (Signed >= 0 &&Signed <= 64)
2693return 128 +Signed;
2694
2695if (Signed >= -16 &&Signed <= -1)
2696return 192 + std::abs(Signed);
2697
2698if (IsFloat) {
2699// clang-format off
2700switch (Literal) {
2701case 0x3800:return 240;// 0.5
2702case 0xB800:return 241;// -0.5
2703case 0x3C00:return 242;// 1.0
2704case 0xBC00:return 243;// -1.0
2705case 0x4000:return 244;// 2.0
2706case 0xC000:return 245;// -2.0
2707case 0x4400:return 246;// 4.0
2708case 0xC400:return 247;// -4.0
2709case 0x3118:return 248;// 1.0 / (2.0 * pi)
2710default:break;
2711 }
2712// clang-format on
2713 }else {
2714// clang-format off
2715switch (Literal) {
2716case 0x3F000000:return 240;// 0.5
2717case 0xBF000000:return 241;// -0.5
2718case 0x3F800000:return 242;// 1.0
2719case 0xBF800000:return 243;// -1.0
2720case 0x40000000:return 244;// 2.0
2721case 0xC0000000:return 245;// -2.0
2722case 0x40800000:return 246;// 4.0
2723case 0xC0800000:return 247;// -4.0
2724case 0x3E22F983:return 248;// 1.0 / (2.0 * pi)
2725default:break;
2726 }
2727// clang-format on
2728 }
2729
2730return {};
2731}
2732
2733// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2734// or nullopt.
2735std::optional<unsigned>getInlineEncodingV2I16(uint32_tLiteral) {
2736returngetInlineEncodingV216(false,Literal);
2737}
2738
2739// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2740// or nullopt.
2741std::optional<unsigned>getInlineEncodingV2BF16(uint32_tLiteral) {
2742 int32_tSigned =static_cast<int32_t>(Literal);
2743if (Signed >= 0 &&Signed <= 64)
2744return 128 +Signed;
2745
2746if (Signed >= -16 &&Signed <= -1)
2747return 192 + std::abs(Signed);
2748
2749// clang-format off
2750switch (Literal) {
2751case 0x3F00:return 240;// 0.5
2752case 0xBF00:return 241;// -0.5
2753case 0x3F80:return 242;// 1.0
2754case 0xBF80:return 243;// -1.0
2755case 0x4000:return 244;// 2.0
2756case 0xC000:return 245;// -2.0
2757case 0x4080:return 246;// 4.0
2758case 0xC080:return 247;// -4.0
2759case 0x3E22:return 248;// 1.0 / (2.0 * pi)
2760default:break;
2761 }
2762// clang-format on
2763
2764return std::nullopt;
2765}
2766
2767// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2768// or nullopt.
2769std::optional<unsigned>getInlineEncodingV2F16(uint32_tLiteral) {
2770returngetInlineEncodingV216(true,Literal);
2771}
2772
2773// Whether the given literal can be inlined for a V_PK_* instruction.
2774boolisInlinableLiteralV216(uint32_tLiteral,uint8_t OpType) {
2775switch (OpType) {
2776caseAMDGPU::OPERAND_REG_IMM_V2INT16:
2777caseAMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2778caseAMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2779returngetInlineEncodingV216(false,Literal).has_value();
2780caseAMDGPU::OPERAND_REG_IMM_V2FP16:
2781caseAMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2782caseAMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2783returngetInlineEncodingV216(true,Literal).has_value();
2784caseAMDGPU::OPERAND_REG_IMM_V2BF16:
2785caseAMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2786caseAMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2787returnisInlinableLiteralV2BF16(Literal);
2788default:
2789llvm_unreachable("bad packed operand type");
2790 }
2791}
2792
2793// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2794boolisInlinableLiteralV2I16(uint32_tLiteral) {
2795returngetInlineEncodingV2I16(Literal).has_value();
2796}
2797
2798// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2799boolisInlinableLiteralV2BF16(uint32_tLiteral) {
2800returngetInlineEncodingV2BF16(Literal).has_value();
2801}
2802
2803// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2804boolisInlinableLiteralV2F16(uint32_tLiteral) {
2805returngetInlineEncodingV2F16(Literal).has_value();
2806}
2807
2808boolisValid32BitLiteral(uint64_t Val,bool IsFP64) {
2809if (IsFP64)
2810return !(Val & 0xffffffffu);
2811
2812return isUInt<32>(Val) || isInt<32>(Val);
2813}
2814
2815boolisArgPassedInSGPR(constArgument *A) {
2816constFunction *F =A->getParent();
2817
2818// Arguments to compute shaders are never a source of divergence.
2819CallingConv::IDCC =F->getCallingConv();
2820switch (CC) {
2821caseCallingConv::AMDGPU_KERNEL:
2822caseCallingConv::SPIR_KERNEL:
2823returntrue;
2824caseCallingConv::AMDGPU_VS:
2825caseCallingConv::AMDGPU_LS:
2826caseCallingConv::AMDGPU_HS:
2827caseCallingConv::AMDGPU_ES:
2828caseCallingConv::AMDGPU_GS:
2829caseCallingConv::AMDGPU_PS:
2830caseCallingConv::AMDGPU_CS:
2831caseCallingConv::AMDGPU_Gfx:
2832caseCallingConv::AMDGPU_CS_Chain:
2833caseCallingConv::AMDGPU_CS_ChainPreserve:
2834// For non-compute shaders, SGPR inputs are marked with either inreg or
2835// byval. Everything else is in VGPRs.
2836returnA->hasAttribute(Attribute::InReg) ||
2837A->hasAttribute(Attribute::ByVal);
2838default:
2839// TODO: treat i1 as divergent?
2840returnA->hasAttribute(Attribute::InReg);
2841 }
2842}
2843
2844boolisArgPassedInSGPR(constCallBase *CB,unsigned ArgNo) {
2845// Arguments to compute shaders are never a source of divergence.
2846CallingConv::IDCC = CB->getCallingConv();
2847switch (CC) {
2848caseCallingConv::AMDGPU_KERNEL:
2849caseCallingConv::SPIR_KERNEL:
2850returntrue;
2851caseCallingConv::AMDGPU_VS:
2852caseCallingConv::AMDGPU_LS:
2853caseCallingConv::AMDGPU_HS:
2854caseCallingConv::AMDGPU_ES:
2855caseCallingConv::AMDGPU_GS:
2856caseCallingConv::AMDGPU_PS:
2857caseCallingConv::AMDGPU_CS:
2858caseCallingConv::AMDGPU_Gfx:
2859caseCallingConv::AMDGPU_CS_Chain:
2860caseCallingConv::AMDGPU_CS_ChainPreserve:
2861// For non-compute shaders, SGPR inputs are marked with either inreg or
2862// byval. Everything else is in VGPRs.
2863return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2864 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2865default:
2866return CB->paramHasAttr(ArgNo, Attribute::InReg);
2867 }
2868}
2869
2870staticboolhasSMEMByteOffset(constMCSubtargetInfo &ST) {
2871returnisGCN3Encoding(ST) ||isGFX10Plus(ST);
2872}
2873
2874boolisLegalSMRDEncodedUnsignedOffset(constMCSubtargetInfo &ST,
2875 int64_t EncodedOffset) {
2876if (isGFX12Plus(ST))
2877return isUInt<23>(EncodedOffset);
2878
2879returnhasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2880 : isUInt<8>(EncodedOffset);
2881}
2882
2883boolisLegalSMRDEncodedSignedOffset(constMCSubtargetInfo &ST,
2884 int64_t EncodedOffset,
2885bool IsBuffer) {
2886if (isGFX12Plus(ST))
2887return isInt<24>(EncodedOffset);
2888
2889return !IsBuffer &&
2890hasSMRDSignedImmOffset(ST) &&
2891 isInt<21>(EncodedOffset);
2892}
2893
2894staticboolisDwordAligned(uint64_t ByteOffset) {
2895return (ByteOffset & 3) == 0;
2896}
2897
2898uint64_tconvertSMRDOffsetUnits(constMCSubtargetInfo &ST,
2899uint64_t ByteOffset) {
2900if (hasSMEMByteOffset(ST))
2901return ByteOffset;
2902
2903assert(isDwordAligned(ByteOffset));
2904return ByteOffset >> 2;
2905}
2906
2907std::optional<int64_t>getSMRDEncodedOffset(constMCSubtargetInfo &ST,
2908 int64_t ByteOffset,bool IsBuffer,
2909bool HasSOffset) {
2910// For unbuffered smem loads, it is illegal for the Immediate Offset to be
2911// negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
2912// Handle case where SOffset is not present.
2913if (!IsBuffer && !HasSOffset && ByteOffset < 0 &&hasSMRDSignedImmOffset(ST))
2914return std::nullopt;
2915
2916if (isGFX12Plus(ST))// 24 bit signed offsets
2917return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2918 : std::nullopt;
2919
2920// The signed version is always a byte offset.
2921if (!IsBuffer &&hasSMRDSignedImmOffset(ST)) {
2922assert(hasSMEMByteOffset(ST));
2923return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2924 : std::nullopt;
2925 }
2926
2927if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2928return std::nullopt;
2929
2930 int64_t EncodedOffset =convertSMRDOffsetUnits(ST, ByteOffset);
2931returnisLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2932 ? std::optional<int64_t>(EncodedOffset)
2933 : std::nullopt;
2934}
2935
2936std::optional<int64_t>getSMRDEncodedLiteralOffset32(constMCSubtargetInfo &ST,
2937 int64_t ByteOffset) {
2938if (!isCI(ST) || !isDwordAligned(ByteOffset))
2939return std::nullopt;
2940
2941 int64_t EncodedOffset =convertSMRDOffsetUnits(ST, ByteOffset);
2942return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2943 : std::nullopt;
2944}
2945
2946unsignedgetNumFlatOffsetBits(constMCSubtargetInfo &ST) {
2947if (AMDGPU::isGFX10(ST))
2948return 12;
2949
2950if (AMDGPU::isGFX12(ST))
2951return 24;
2952return 13;
2953}
2954
2955namespace{
2956
2957structSourceOfDivergence {
2958unsignedIntr;
2959};
2960const SourceOfDivergence *lookupSourceOfDivergence(unsignedIntr);
2961
2962structAlwaysUniform {
2963unsignedIntr;
2964};
2965constAlwaysUniform *lookupAlwaysUniform(unsignedIntr);
2966
2967#define GET_SourcesOfDivergence_IMPL
2968#define GET_UniformIntrinsics_IMPL
2969#define GET_Gfx9BufferFormat_IMPL
2970#define GET_Gfx10BufferFormat_IMPL
2971#define GET_Gfx11PlusBufferFormat_IMPL
2972
2973#include "AMDGPUGenSearchableTables.inc"
2974
2975}// end anonymous namespace
2976
2977boolisIntrinsicSourceOfDivergence(unsigned IntrID) {
2978return lookupSourceOfDivergence(IntrID);
2979}
2980
2981boolisIntrinsicAlwaysUniform(unsigned IntrID) {
2982return lookupAlwaysUniform(IntrID);
2983}
2984
2985constGcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
2986uint8_t NumComponents,
2987uint8_t NumFormat,
2988constMCSubtargetInfo &STI) {
2989returnisGFX11Plus(STI)
2990 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2991 NumFormat)
2992 :isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2993 NumComponents, NumFormat)
2994 : getGfx9BufferFormatInfo(BitsPerComp,
2995 NumComponents, NumFormat);
2996}
2997
2998constGcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_tFormat,
2999constMCSubtargetInfo &STI) {
3000returnisGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3001 :isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3002 : getGfx9BufferFormatInfo(Format);
3003}
3004
3005boolhasAny64BitVGPROperands(constMCInstrDesc &OpDesc) {
3006for (autoOpName : { OpName::vdst, OpName::src0, OpName::src1,
3007 OpName::src2 }) {
3008intIdx =getNamedOperandIdx(OpDesc.getOpcode(),OpName);
3009if (Idx == -1)
3010continue;
3011
3012if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3013 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3014returntrue;
3015 }
3016
3017returnfalse;
3018}
3019
3020boolisDPALU_DPP(constMCInstrDesc &OpDesc) {
3021returnhasAny64BitVGPROperands(OpDesc);
3022}
3023
3024unsignedgetLdsDwGranularity(constMCSubtargetInfo &ST) {
3025// Currently this is 128 for all subtargets
3026return 128;
3027}
3028
3029}// namespace AMDGPU
3030
3031raw_ostream &operator<<(raw_ostream &OS,
3032constAMDGPU::IsaInfo::TargetIDSetting S) {
3033switch (S) {
3034case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3035OS <<"Unsupported";
3036break;
3037case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3038OS <<"Any";
3039break;
3040case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3041OS <<"Off";
3042break;
3043case (AMDGPU::IsaInfo::TargetIDSetting::On):
3044OS <<"On";
3045break;
3046 }
3047returnOS;
3048}
3049
3050}// namespace llvm
MRI
unsigned const MachineRegisterInfo * MRI
Definition:AArch64AdvSIMDScalarPass.cpp:105
AMDGPUAsmUtils.h
MAP_REG2REG
#define MAP_REG2REG
Definition:AMDGPUBaseInfo.cpp:2292
Intr
unsigned Intr
Definition:AMDGPUBaseInfo.cpp:2958
DefaultAMDHSACodeObjectVersion
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
AMDGPUBaseInfo.h
AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.
AMDGPU.h
AMDKernelCodeTUtils.h
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
AMDKernelCodeT.h
AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
Definition:AMDKernelCodeT.h:127
Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...
ELF.h
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Info
Analysis containing CSE Info
Definition:CSEInfo.cpp:27
CommandLine.h
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
Name
std::string Name
Definition:ELFObjHandler.cpp:77
Index
uint32_t Index
Definition:ELFObjHandler.cpp:83
Size
uint64_t Size
Definition:ELFObjHandler.cpp:81
GlobalValue.h
Function.h
LLVMContext.h
MCInstrInfo.h
MCRegisterInfo.h
MCSubtargetInfo.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
TRI
unsigned const TargetRegisterInfo * TRI
Definition:MachineSink.cpp:2029
Reg
unsigned Reg
Definition:MachineSink.cpp:2028
Signed
@ Signed
Definition:NVPTXISelLowering.cpp:4789
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
S_00B848_MEM_ORDERED
#define S_00B848_MEM_ORDERED(x)
Definition:SIDefines.h:1193
S_00B848_WGP_MODE
#define S_00B848_WGP_MODE(x)
Definition:SIDefines.h:1190
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
DefaultVal
unsigned unsigned DefaultVal
Definition:SPIRVModuleAnalysis.cpp:64
OS
raw_pwrite_stream & OS
Definition:SampleProfWriter.cpp:51
StringExtras.h
This file contains some functions that are useful when dealing with strings.
TargetParser.h
T
llvm::AMDGPUSubtarget::Generation
Generation
Definition:AMDGPUSubtarget.h:31
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition:AMDGPUSubtarget.h:41
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition:AMDGPUSubtarget.h:39
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccSupported
bool isSramEccSupported() const
Definition:AMDGPUBaseInfo.h:180
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setTargetIDFromFeaturesString
void setTargetIDFromFeaturesString(StringRef FS)
Definition:AMDGPUBaseInfo.cpp:815
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getXnackSetting
TargetIDSetting getXnackSetting() const
Definition:AMDGPUBaseInfo.h:170
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::AMDGPUTargetID
AMDGPUTargetID(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:806
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackSupported
bool isXnackSupported() const
Definition:AMDGPUBaseInfo.h:151
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setTargetIDFromTargetIDStream
void setTargetIDFromTargetIDStream(StringRef TargetID)
Definition:AMDGPUBaseInfo.cpp:883
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::toString
std::string toString() const
Definition:AMDGPUBaseInfo.cpp:895
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getSramEccSetting
TargetIDSetting getSramEccSetting() const
Definition:AMDGPUBaseInfo.h:199
llvm::AMDGPU::VOPD::ComponentInfo
Definition:AMDGPUBaseInfo.h:811
llvm::AMDGPU::VOPD::ComponentInfo::getIndexInParsedOperands
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
Definition:AMDGPUBaseInfo.cpp:727
llvm::AMDGPU::VOPD::ComponentLayout::getIndexOfDstInParsedOperands
unsigned getIndexOfDstInParsedOperands() const
Definition:AMDGPUBaseInfo.h:791
llvm::AMDGPU::VOPD::ComponentLayout::getIndexOfSrcInParsedOperands
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
Definition:AMDGPUBaseInfo.h:796
llvm::AMDGPU::VOPD::ComponentProps::ComponentProps
ComponentProps()=default
llvm::AMDGPU::VOPD::ComponentProps::getCompParsedSrcOperandsNum
unsigned getCompParsedSrcOperandsNum() const
Definition:AMDGPUBaseInfo.h:677
llvm::AMDGPU::VOPD::InstInfo
Definition:AMDGPUBaseInfo.h:828
llvm::AMDGPU::VOPD::InstInfo::getInvalidCompOperandIndex
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const
Definition:AMDGPUBaseInfo.cpp:741
llvm::AMDGPU::VOPD::InstInfo::RegIndices
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition:AMDGPUBaseInfo.h:833
llvm::Any
Definition:Any.h:28
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition:Argument.h:31
llvm::Attribute
Definition:Attributes.h:67
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition:InstrTypes.h:1112
llvm::CallBase::getCallingConv
CallingConv::ID getCallingConv() const
Definition:InstrTypes.h:1399
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition:Instructions.cpp:409
llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition:DWARFExpression.h:32
llvm::FeatureBitset::test
constexpr bool test(unsigned I) const
Definition:SubtargetFeature.h:82
llvm::Function
Definition:Function.h:63
llvm::GlobalValue
Definition:GlobalValue.h:48
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition:GlobalValue.h:206
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition:LLVMContext.h:67
llvm::LLVMContext::emitError
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Definition:LLVMContext.cpp:210
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition:MCInstrDesc.h:198
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition:MCInstrDesc.h:237
llvm::MCInstrDesc::operands
ArrayRef< MCOperandInfo > operands() const
Definition:MCInstrDesc.h:239
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition:MCInstrDesc.h:248
llvm::MCInstrDesc::getOperandConstraint
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition:MCInstrDesc.h:219
llvm::MCInstrDesc::getOpcode
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition:MCInstrDesc.h:230
llvm::MCInstrInfo
Interface to description of machine instruction set.
Definition:MCInstrInfo.h:26
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition:MCInstrInfo.h:63
llvm::MCRegisterClass
MCRegisterClass - Base class of TargetRegisterClass.
Definition:MCRegisterInfo.h:35
llvm::MCRegisterClass::getID
unsigned getID() const
getID() - Return the register class ID number.
Definition:MCRegisterInfo.h:53
llvm::MCRegisterClass::contains
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
Definition:MCRegisterInfo.h:73
llvm::MCRegisterInfo
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Definition:MCRegisterInfo.h:149
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition:MCRegister.h:33
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition:MCSubtargetInfo.h:76
llvm::MCSubtargetInfo::hasFeature
bool hasFeature(unsigned Feature) const
Definition:MCSubtargetInfo.h:121
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition:MCSubtargetInfo.h:110
llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition:MCSubtargetInfo.h:114
llvm::MCSubtargetInfo::getCPU
StringRef getCPU() const
Definition:MCSubtargetInfo.h:111
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition:Module.h:65
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition:StringRef.h:853
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StringRef::split
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition:StringRef.h:700
llvm::StringRef::getAsInteger
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition:StringRef.h:470
llvm::StringRef::str
std::string str() const
str - Get the contents as an std::string.
Definition:StringRef.h:229
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition:StringRef.h:147
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition:StringRef.h:150
llvm::StringRef::ends_with
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition:StringRef.h:277
llvm::SubtargetFeatures
Manages the enabling and disabling of subtarget specific features.
Definition:SubtargetFeature.h:174
llvm::SubtargetFeatures::getFeatures
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Definition:SubtargetFeature.h:189
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition:Triple.h:44
llvm::Triple::AMDHSA
@ AMDHSA
Definition:Triple.h:223
llvm::Triple::r600
@ r600
Definition:Triple.h:73
llvm::Triple::amdgcn
@ amdgcn
Definition:Triple.h:74
llvm::Triple::getOS
OSType getOS() const
Get the parsed operating system type of this triple.
Definition:Triple.h:404
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition:Triple.h:395
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition:Twine.h:81
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition:raw_ostream.h:52
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition:raw_ostream.h:661
uint16_t
uint32_t
uint64_t
uint8_t
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
OpName
Definition:R600Defines.h:62
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition:AMDGPUAddrSpace.h:38
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition:AMDGPUAddrSpace.h:35
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition:AMDGPUAddrSpace.h:34
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition:AMDGPUAddrSpace.h:31
llvm::AMDGPU::DepCtr::decodeDepCtr
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1698
llvm::AMDGPU::DepCtr::encodeFieldVaVdst
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
Definition:AMDGPUBaseInfo.cpp:1730
llvm::AMDGPU::DepCtr::decodeFieldSaSdst
unsigned decodeFieldSaSdst(unsigned Encoded)
Definition:AMDGPUBaseInfo.cpp:1718
llvm::AMDGPU::DepCtr::encodeFieldVmVsrc
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
Definition:AMDGPUBaseInfo.cpp:1722
llvm::AMDGPU::DepCtr::DEP_CTR_SIZE
const int DEP_CTR_SIZE
Definition:AMDGPUAsmUtils.cpp:83
llvm::AMDGPU::DepCtr::encodeDepCtr
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1704
llvm::AMDGPU::DepCtr::encodeFieldSaSdst
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
Definition:AMDGPUBaseInfo.cpp:1738
llvm::AMDGPU::DepCtr::DepCtrInfo
const CustomOperandVal DepCtrInfo[]
Definition:AMDGPUAsmUtils.cpp:71
llvm::AMDGPU::DepCtr::isSymbolicDepCtrEncoding
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1692
llvm::AMDGPU::DepCtr::decodeFieldVaVdst
unsigned decodeFieldVaVdst(unsigned Encoded)
Definition:AMDGPUBaseInfo.cpp:1714
llvm::AMDGPU::DepCtr::getDefaultDepCtrEncoding
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1685
llvm::AMDGPU::DepCtr::decodeFieldVmVsrc
unsigned decodeFieldVmVsrc(unsigned Encoded)
Definition:AMDGPUBaseInfo.cpp:1710
llvm::AMDGPU::Exp::isSupportedTgtId
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1804
llvm::AMDGPU::Exp::ExpTgtInfo
static constexpr ExpTgt ExpTgtInfo[]
Definition:AMDGPUBaseInfo.cpp:1760
llvm::AMDGPU::Exp::getTgtName
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
Definition:AMDGPUBaseInfo.cpp:1770
llvm::AMDGPU::Exp::getTgtId
unsigned getTgtId(const StringRef Name)
Definition:AMDGPUBaseInfo.cpp:1781
llvm::AMDGPU::Exp::ET_DUAL_SRC_BLEND_MAX_IDX
@ ET_DUAL_SRC_BLEND_MAX_IDX
Definition:SIDefines.h:1016
llvm::AMDGPU::Exp::ET_PARAM_MAX_IDX
@ ET_PARAM_MAX_IDX
Definition:SIDefines.h:1017
llvm::AMDGPU::Exp::ET_NULL
@ ET_NULL
Definition:SIDefines.h:1000
llvm::AMDGPU::Exp::ET_PRIM_MAX_IDX
@ ET_PRIM_MAX_IDX
Definition:SIDefines.h:1013
llvm::AMDGPU::Exp::ET_MRT0
@ ET_MRT0
Definition:SIDefines.h:997
llvm::AMDGPU::Exp::ET_DUAL_SRC_BLEND1
@ ET_DUAL_SRC_BLEND1
Definition:SIDefines.h:1007
llvm::AMDGPU::Exp::ET_INVALID
@ ET_INVALID
Definition:SIDefines.h:1019
llvm::AMDGPU::Exp::ET_PRIM
@ ET_PRIM
Definition:SIDefines.h:1005
llvm::AMDGPU::Exp::ET_PARAM31
@ ET_PARAM31
Definition:SIDefines.h:1009
llvm::AMDGPU::Exp::ET_POS4
@ ET_POS4
Definition:SIDefines.h:1003
llvm::AMDGPU::Exp::ET_MRT_MAX_IDX
@ ET_MRT_MAX_IDX
Definition:SIDefines.h:1014
llvm::AMDGPU::Exp::ET_POS0
@ ET_POS0
Definition:SIDefines.h:1001
llvm::AMDGPU::Exp::ET_DUAL_SRC_BLEND0
@ ET_DUAL_SRC_BLEND0
Definition:SIDefines.h:1006
llvm::AMDGPU::Exp::ET_MRTZ_MAX_IDX
@ ET_MRTZ_MAX_IDX
Definition:SIDefines.h:1012
llvm::AMDGPU::Exp::ET_MRTZ
@ ET_MRTZ
Definition:SIDefines.h:999
llvm::AMDGPU::Exp::ET_NULL_MAX_IDX
@ ET_NULL_MAX_IDX
Definition:SIDefines.h:1011
llvm::AMDGPU::Exp::ET_POS_MAX_IDX
@ ET_POS_MAX_IDX
Definition:SIDefines.h:1015
llvm::AMDGPU::Exp::ET_PARAM0
@ ET_PARAM0
Definition:SIDefines.h:1008
llvm::AMDGPU::HSAMD::V3::VersionMajor
constexpr uint32_t VersionMajor
HSA metadata major version.
Definition:AMDGPUMetadata.h:460
llvm::AMDGPU::HWEncoding::IS_HI16
@ IS_HI16
Definition:SIDefines.h:374
llvm::AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET
@ COMPLETION_ACTION_OFFSET
Definition:SIDefines.h:1042
llvm::AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET
@ HOSTCALL_PTR_OFFSET
Definition:SIDefines.h:1037
llvm::AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET
@ DEFAULT_QUEUE_OFFSET
Definition:SIDefines.h:1041
llvm::AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET
@ MULTIGRID_SYNC_ARG_OFFSET
Definition:SIDefines.h:1038
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition:AMDGPUBaseInfo.cpp:1157
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1169
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition:AMDGPUBaseInfo.cpp:1011
llvm::AMDGPU::IsaInfo::getWavefrontSize
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:937
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition:AMDGPUBaseInfo.cpp:979
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1021
llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1002
llvm::AMDGPU::IsaInfo::getWavesPerWorkGroup
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition:AMDGPUBaseInfo.cpp:1026
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition:AMDGPUBaseInfo.cpp:1097
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1040
llvm::AMDGPU::IsaInfo::getLocalMemorySize
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:946
llvm::AMDGPU::IsaInfo::getAddressableLocalMemorySize
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:958
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition:AMDGPUBaseInfo.cpp:1231
llvm::AMDGPU::IsaInfo::getEUsPerCU
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:968
llvm::AMDGPU::IsaInfo::TRAP_NUM_SGPRS
@ TRAP_NUM_SGPRS
Definition:AMDGPUBaseInfo.h:130
llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition:AMDGPUBaseInfo.h:129
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1051
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1182
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition:AMDGPUBaseInfo.cpp:1063
llvm::AMDGPU::IsaInfo::getTargetIDSettingFromFeatureString
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
Definition:AMDGPUBaseInfo.cpp:874
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1017
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition:AMDGPUBaseInfo.cpp:1080
llvm::AMDGPU::IsaInfo::getNumSGPRBlocks
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Definition:AMDGPUBaseInfo.cpp:1133
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:998
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1031
llvm::AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
Definition:AMDGPUBaseInfo.cpp:1188
llvm::AMDGPU::IsaInfo::TargetIDSetting
TargetIDSetting
Definition:AMDGPUBaseInfo.h:133
llvm::AMDGPU::IsaInfo::TargetIDSetting::On
@ On
llvm::AMDGPU::IsaInfo::TargetIDSetting::Unsupported
@ Unsupported
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::AMDGPU::IsaInfo::TargetIDSetting::Any
@ Any
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition:AMDGPUBaseInfo.cpp:1255
llvm::AMDGPU::IsaInfo::getEncodedNumVGPRBlocks
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
Definition:AMDGPUBaseInfo.cpp:1264
llvm::AMDGPU::IsaInfo::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
Definition:AMDGPUBaseInfo.cpp:1204
llvm::AMDGPU::IsaInfo::getGranulatedNumRegisterBlocks
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
Definition:AMDGPUBaseInfo.cpp:1128
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition:AMDGPUBaseInfo.cpp:1139
llvm::AMDGPU::IsaInfo::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1180
llvm::AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
Definition:AMDGPUBaseInfo.cpp:1271
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1044
llvm::AMDGPU::MFMAScaleFormats::FP8_E5M2
@ FP8_E5M2
Definition:SIDefines.h:1056
llvm::AMDGPU::MFMAScaleFormats::FP4_E2M1
@ FP4_E2M1
Definition:SIDefines.h:1059
llvm::AMDGPU::MFMAScaleFormats::FP8_E4M3
@ FP8_E4M3
Definition:SIDefines.h:1055
llvm::AMDGPU::MFMAScaleFormats::FP6_E3M2
@ FP6_E3M2
Definition:SIDefines.h:1058
llvm::AMDGPU::MFMAScaleFormats::FP6_E2M3
@ FP6_E2M3
Definition:SIDefines.h:1057
llvm::AMDGPU::MTBUFFormat::UfmtSymbolicGFX11
StringLiteral const UfmtSymbolicGFX11[]
Definition:AMDGPUAsmUtils.cpp:484
llvm::AMDGPU::MTBUFFormat::DFMT_MIN
@ DFMT_MIN
Definition:SIDefines.h:601
llvm::AMDGPU::MTBUFFormat::DFMT_UNDEF
@ DFMT_UNDEF
Definition:SIDefines.h:604
llvm::AMDGPU::MTBUFFormat::DFMT_MASK
@ DFMT_MASK
Definition:SIDefines.h:608
llvm::AMDGPU::MTBUFFormat::DFMT_MAX
@ DFMT_MAX
Definition:SIDefines.h:602
llvm::AMDGPU::MTBUFFormat::DFMT_SHIFT
@ DFMT_SHIFT
Definition:SIDefines.h:607
llvm::AMDGPU::MTBUFFormat::isValidUnifiedFormat
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1905
llvm::AMDGPU::MTBUFFormat::getDefaultFormatEncoding
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1930
llvm::AMDGPU::MTBUFFormat::getUnifiedFormatName
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1899
llvm::AMDGPU::MTBUFFormat::DfmtNfmt2UFmtGFX10
unsigned const DfmtNfmt2UFmtGFX10[]
Definition:AMDGPUAsmUtils.cpp:390
llvm::AMDGPU::MTBUFFormat::DfmtSymbolic
StringLiteral const DfmtSymbolic[]
Definition:AMDGPUAsmUtils.cpp:244
llvm::AMDGPU::MTBUFFormat::getNfmtLookupTable
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1842
llvm::AMDGPU::MTBUFFormat::NFMT_UNDEF
@ NFMT_UNDEF
Definition:SIDefines.h:625
llvm::AMDGPU::MTBUFFormat::NFMT_SHIFT
@ NFMT_SHIFT
Definition:SIDefines.h:628
llvm::AMDGPU::MTBUFFormat::NFMT_MASK
@ NFMT_MASK
Definition:SIDefines.h:629
llvm::AMDGPU::MTBUFFormat::NFMT_MAX
@ NFMT_MAX
Definition:SIDefines.h:623
llvm::AMDGPU::MTBUFFormat::NFMT_MIN
@ NFMT_MIN
Definition:SIDefines.h:622
llvm::AMDGPU::MTBUFFormat::isValidNfmt
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1871
llvm::AMDGPU::MTBUFFormat::NfmtSymbolicGFX10
StringLiteral const NfmtSymbolicGFX10[]
Definition:AMDGPUAsmUtils.cpp:263
llvm::AMDGPU::MTBUFFormat::isValidDfmtNfmt
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1864
llvm::AMDGPU::MTBUFFormat::convertDfmtNfmt2Ufmt
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1909
llvm::AMDGPU::MTBUFFormat::getDfmtName
StringRef getDfmtName(unsigned Id)
Definition:AMDGPUBaseInfo.cpp:1837
llvm::AMDGPU::MTBUFFormat::encodeDfmtNfmt
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
Definition:AMDGPUBaseInfo.cpp:1875
llvm::AMDGPU::MTBUFFormat::getUnifiedFormat
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1884
llvm::AMDGPU::MTBUFFormat::isValidFormatEncoding
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1926
llvm::AMDGPU::MTBUFFormat::getNfmtName
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1859
llvm::AMDGPU::MTBUFFormat::DFMT_NFMT_DEFAULT
@ DFMT_NFMT_DEFAULT
Definition:SIDefines.h:634
llvm::AMDGPU::MTBUFFormat::DFMT_NFMT_MAX
@ DFMT_NFMT_MAX
Definition:SIDefines.h:640
llvm::AMDGPU::MTBUFFormat::DfmtNfmt2UFmtGFX11
unsigned const DfmtNfmt2UFmtGFX11[]
Definition:AMDGPUAsmUtils.cpp:564
llvm::AMDGPU::MTBUFFormat::NfmtSymbolicVI
StringLiteral const NfmtSymbolicVI[]
Definition:AMDGPUAsmUtils.cpp:285
llvm::AMDGPU::MTBUFFormat::NfmtSymbolicSICI
StringLiteral const NfmtSymbolicSICI[]
Definition:AMDGPUAsmUtils.cpp:274
llvm::AMDGPU::MTBUFFormat::getNfmt
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1850
llvm::AMDGPU::MTBUFFormat::getDfmt
int64_t getDfmt(const StringRef Name)
Definition:AMDGPUBaseInfo.cpp:1829
llvm::AMDGPU::MTBUFFormat::UFMT_UNDEF
@ UFMT_UNDEF
Definition:SIDefines.h:645
llvm::AMDGPU::MTBUFFormat::UFMT_DEFAULT
@ UFMT_DEFAULT
Definition:SIDefines.h:646
llvm::AMDGPU::MTBUFFormat::UFMT_MAX
@ UFMT_MAX
Definition:SIDefines.h:644
llvm::AMDGPU::MTBUFFormat::UfmtSymbolicGFX10
StringLiteral const UfmtSymbolicGFX10[]
Definition:AMDGPUAsmUtils.cpp:296
llvm::AMDGPU::MTBUFFormat::decodeDfmtNfmt
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
Definition:AMDGPUBaseInfo.cpp:1879
llvm::AMDGPU::SendMsg::encodeMsg
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
Definition:AMDGPUBaseInfo.cpp:2014
llvm::AMDGPU::SendMsg::StreamId
StreamId
Definition:SIDefines.h:491
llvm::AMDGPU::SendMsg::STREAM_ID_LAST_
@ STREAM_ID_LAST_
Definition:SIDefines.h:494
llvm::AMDGPU::SendMsg::STREAM_ID_SHIFT_
@ STREAM_ID_SHIFT_
Definition:SIDefines.h:496
llvm::AMDGPU::SendMsg::STREAM_ID_FIRST_
@ STREAM_ID_FIRST_
Definition:SIDefines.h:495
llvm::AMDGPU::SendMsg::STREAM_ID_MASK_
@ STREAM_ID_MASK_
Definition:SIDefines.h:498
llvm::AMDGPU::SendMsg::STREAM_ID_NONE_
@ STREAM_ID_NONE_
Definition:SIDefines.h:492
llvm::AMDGPU::SendMsg::ID_MASK_PreGFX11_
@ ID_MASK_PreGFX11_
Definition:SIDefines.h:467
llvm::AMDGPU::SendMsg::ID_SYSMSG
@ ID_SYSMSG
Definition:SIDefines.h:456
llvm::AMDGPU::SendMsg::ID_GS_PreGFX11
@ ID_GS_PreGFX11
Definition:SIDefines.h:442
llvm::AMDGPU::SendMsg::ID_GS_DONE_PreGFX11
@ ID_GS_DONE_PreGFX11
Definition:SIDefines.h:443
llvm::AMDGPU::SendMsg::ID_MASK_GFX11Plus_
@ ID_MASK_GFX11Plus_
Definition:SIDefines.h:468
llvm::AMDGPU::SendMsg::msgSupportsStream
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1995
llvm::AMDGPU::SendMsg::decodeMsg
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2002
llvm::AMDGPU::SendMsg::isValidMsgId
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1948
llvm::AMDGPU::SendMsg::OP_GS_NOP
@ OP_GS_NOP
Definition:SIDefines.h:478
llvm::AMDGPU::SendMsg::OP_NONE_
@ OP_NONE_
Definition:SIDefines.h:473
llvm::AMDGPU::SendMsg::OP_SHIFT_
@ OP_SHIFT_
Definition:SIDefines.h:472
llvm::AMDGPU::SendMsg::OP_MASK_
@ OP_MASK_
Definition:SIDefines.h:476
llvm::AMDGPU::SendMsg::isValidMsgStream
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
Definition:AMDGPUBaseInfo.cpp:1969
llvm::AMDGPU::SendMsg::getMsgOpName
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
Definition:AMDGPUAsmUtils.cpp:153
llvm::AMDGPU::SendMsg::getMsgIdMask
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1944
llvm::AMDGPU::SendMsg::msgRequiresOp
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1989
llvm::AMDGPU::SendMsg::isValidMsgOp
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
Definition:AMDGPUBaseInfo.cpp:1952
llvm::AMDGPU::UfmtGFX10::UFMT_LAST
@ UFMT_LAST
Definition:SIDefines.h:746
llvm::AMDGPU::UfmtGFX10::UFMT_FIRST
@ UFMT_FIRST
Definition:SIDefines.h:745
llvm::AMDGPU::UfmtGFX11::UFMT_FIRST
@ UFMT_FIRST
Definition:SIDefines.h:831
llvm::AMDGPU::UfmtGFX11::UFMT_LAST
@ UFMT_LAST
Definition:SIDefines.h:832
llvm::AMDGPU::VOPD::VOPD_VGPR_BANK_MASKS
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
Definition:AMDGPUBaseInfo.h:656
llvm::AMDGPU::VOPD::COMPONENTS_NUM
constexpr unsigned COMPONENTS_NUM
Definition:AMDGPUBaseInfo.h:660
llvm::AMDGPU::VOPD::X
@ X
Definition:AMDGPUBaseInfo.h:658
llvm::AMDGPU::VOPD::Y
@ Y
Definition:AMDGPUBaseInfo.h:658
llvm::AMDGPU::VOPD::COMPONENT_X
@ COMPONENT_X
Definition:AMDGPUBaseInfo.h:710
llvm::AMDGPU::VOPD::MAX_SRC_NUM
@ MAX_SRC_NUM
Definition:AMDGPUBaseInfo.h:650
llvm::AMDGPU::VOPD::MAX_OPR_NUM
@ MAX_OPR_NUM
Definition:AMDGPUBaseInfo.h:651
llvm::AMDGPU::VOPD::DST
@ DST
Definition:AMDGPUBaseInfo.h:644
llvm::AMDGPU::VOPD::SRC2
@ SRC2
Definition:AMDGPUBaseInfo.h:647
llvm::AMDGPU::VOPD::DST_NUM
@ DST_NUM
Definition:AMDGPUBaseInfo.h:649
llvm::AMDGPU::VOPD::SRC1
@ SRC1
Definition:AMDGPUBaseInfo.h:646
llvm::AMDGPU::VOPD::SRC0
@ SRC0
Definition:AMDGPUBaseInfo.h:645
llvm::AMDGPU::isGCN3Encoding
bool isGCN3Encoding(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2226
llvm::AMDGPU::isInlinableLiteralBF16
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
Definition:AMDGPUBaseInfo.cpp:2642
llvm::AMDGPU::isGFX10_BEncoding
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2234
llvm::AMDGPU::isGFX10_GFX11
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2190
llvm::AMDGPU::isInlinableLiteralV216
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
Definition:AMDGPUBaseInfo.cpp:2774
llvm::AMDGPU::getMIMGInfo
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
llvm::AMDGPU::getRegOperandSize
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
Definition:AMDGPUBaseInfo.cpp:2592
llvm::AMDGPU::decodeWaitcnt
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
Definition:AMDGPUBaseInfo.cpp:1472
llvm::AMDGPU::isInlinableLiteralFP16
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
Definition:AMDGPUBaseInfo.cpp:2663
llvm::AMDGPU::isSGPR
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
Definition:AMDGPUBaseInfo.cpp:2281
llvm::AMDGPU::convertSMRDOffsetUnits
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition:AMDGPUBaseInfo.cpp:2898
llvm::AMDGPU::encodeStorecnt
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
Definition:AMDGPUBaseInfo.cpp:1561
llvm::AMDGPU::getMCReg
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
Definition:AMDGPUBaseInfo.cpp:2349
llvm::AMDGPU::hasSMEMByteOffset
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
Definition:AMDGPUBaseInfo.cpp:2870
llvm::AMDGPU::isVOPCAsmOnly
bool isVOPCAsmOnly(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:526
llvm::AMDGPU::getMIMGOpcode
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
Definition:AMDGPUBaseInfo.cpp:273
llvm::AMDGPU::getMTBUFHasSrsrc
bool getMTBUFHasSrsrc(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:452
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition:AMDGPUBaseInfo.cpp:2936
llvm::AMDGPU::isSymbolicCustomOperandEncoding
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1614
llvm::AMDGPU::isGFX10Before1030
bool isGFX10Before1030(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2222
llvm::AMDGPU::isSISrcInlinableOperand
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
Definition:AMDGPUBaseInfo.cpp:2436
llvm::AMDGPU::mapWMMA2AddrTo3AddrOpcode
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:671
llvm::AMDGPU::OPR_ID_UNSUPPORTED
const int OPR_ID_UNSUPPORTED
Definition:AMDGPUAsmUtils.h:24
llvm::AMDGPU::shouldEmitConstantsToTextSection
bool shouldEmitConstantsToTextSection(const Triple &TT)
Definition:AMDGPUBaseInfo.cpp:1327
llvm::AMDGPU::isInlinableLiteralV2I16
bool isInlinableLiteralV2I16(uint32_t Literal)
Definition:AMDGPUBaseInfo.cpp:2794
llvm::AMDGPU::getMTBUFElements
int getMTBUFElements(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:442
llvm::AMDGPU::isHi16Reg
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
Definition:AMDGPUBaseInfo.cpp:2288
llvm::AMDGPU::encodeCustomOperandVal
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
Definition:AMDGPUBaseInfo.cpp:1650
llvm::AMDGPU::getTotalNumVGPRs
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
Definition:AMDGPUBaseInfo.cpp:2274
llvm::AMDGPU::isGFX10
bool isGFX10(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2186
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::AMDGPU::isInlinableLiteralV2BF16
bool isInlinableLiteralV2BF16(uint32_t Literal)
Definition:AMDGPUBaseInfo.cpp:2799
llvm::AMDGPU::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2146
llvm::AMDGPU::getInlineEncodingV216
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
Definition:AMDGPUBaseInfo.cpp:2680
llvm::AMDGPU::getFPDstSelType
FPType getFPDstSelType(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:659
llvm::AMDGPU::getNumFlatOffsetBits
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
Definition:AMDGPUBaseInfo.cpp:2946
llvm::AMDGPU::AMDHSA_COV4
@ AMDHSA_COV4
Definition:AMDGPUBaseInfo.h:56
llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition:AMDGPUBaseInfo.h:56
llvm::AMDGPU::AMDHSA_COV6
@ AMDHSA_COV6
Definition:AMDGPUBaseInfo.h:56
llvm::AMDGPU::hasA16
bool hasA16(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2118
llvm::AMDGPU::isLegalSMRDEncodedSignedOffset
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
Definition:AMDGPUBaseInfo.cpp:2883
llvm::AMDGPU::isGFX12Plus
bool isGFX12Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2210
llvm::AMDGPU::getNSAMaxSize
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
Definition:AMDGPUBaseInfo.cpp:2135
llvm::AMDGPU::getCanBeVOPD
CanBeVOPD getCanBeVOPD(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:570
llvm::AMDGPU::hasPackedD16
bool hasPackedD16(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2126
llvm::AMDGPU::getStorecntBitMask
unsigned getStorecntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1438
llvm::AMDGPU::getLdsDwGranularity
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
Definition:AMDGPUBaseInfo.cpp:3024
llvm::AMDGPU::isGFX940
bool isGFX940(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2250
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2066
llvm::AMDGPU::isInlinableLiteralV2F16
bool isInlinableLiteralV2F16(uint32_t Literal)
Definition:AMDGPUBaseInfo.cpp:2804
llvm::AMDGPU::isHsaAbi
bool isHsaAbi(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:168
llvm::AMDGPU::isGFX11
bool isGFX11(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2198
llvm::AMDGPU::OPR_VAL_INVALID
const int OPR_VAL_INVALID
Definition:AMDGPUAsmUtils.h:26
llvm::AMDGPU::getSMEMIsBuffer
bool getSMEMIsBuffer(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:502
llvm::AMDGPU::isGFX10_3_GFX11
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2242
llvm::AMDGPU::mfmaScaleF8F6F4FormatToNumRegs
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
Definition:AMDGPUBaseInfo.cpp:538
llvm::AMDGPU::isGroupSegment
bool isGroupSegment(const GlobalValue *GV)
Definition:AMDGPUBaseInfo.cpp:1313
llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition:TargetParser.cpp:229
llvm::AMDGPU::getMTBUFHasSoffset
bool getMTBUFHasSoffset(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:457
llvm::AMDGPU::hasXNACK
bool hasXNACK(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2106
llvm::AMDGPU::isValid32BitLiteral
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
Definition:AMDGPUBaseInfo.cpp:2808
llvm::AMDGPU::getCombinedCountBitMask
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
Definition:AMDGPUBaseInfo.cpp:1521
llvm::AMDGPU::getVOPDOpcode
unsigned getVOPDOpcode(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:577
llvm::AMDGPU::isDPALU_DPP
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
Definition:AMDGPUBaseInfo.cpp:3020
llvm::AMDGPU::encodeWaitcnt
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
Definition:AMDGPUBaseInfo.cpp:1508
llvm::AMDGPU::isVOPC64DPP
bool isVOPC64DPP(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:522
llvm::AMDGPU::getMUBUFOpcode
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
Definition:AMDGPUBaseInfo.cpp:467
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition:AMDGPUBaseInfo.cpp:2062
llvm::AMDGPU::getMAIIsGFX940XDL
bool getMAIIsGFX940XDL(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:533
llvm::AMDGPU::isSI
bool isSI(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2148
llvm::AMDGPU::getDefaultAMDHSACodeObjectVersion
unsigned getDefaultAMDHSACodeObjectVersion()
Definition:AMDGPUBaseInfo.cpp:181
llvm::AMDGPU::isReadOnlySegment
bool isReadOnlySegment(const GlobalValue *GV)
Definition:AMDGPUBaseInfo.cpp:1321
llvm::AMDGPU::isArgPassedInSGPR
bool isArgPassedInSGPR(const Argument *A)
Definition:AMDGPUBaseInfo.cpp:2815
llvm::AMDGPU::isIntrinsicAlwaysUniform
bool isIntrinsicAlwaysUniform(unsigned IntrID)
Definition:AMDGPUBaseInfo.cpp:2981
llvm::AMDGPU::getMUBUFBaseOpcode
int getMUBUFBaseOpcode(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:462
llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition:AMDGPUBaseInfo.cpp:172
llvm::AMDGPU::decodeLgkmcnt
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition:AMDGPUBaseInfo.cpp:1467
llvm::AMDGPU::getWaitcntBitMask
unsigned getWaitcntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1442
llvm::AMDGPU::getVOP3IsSingle
bool getVOP3IsSingle(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:517
llvm::AMDGPU::isGFX9
bool isGFX9(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2160
llvm::AMDGPU::getVOP1IsSingle
bool getVOP1IsSingle(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:507
llvm::AMDGPU::isDwordAligned
static bool isDwordAligned(uint64_t ByteOffset)
Definition:AMDGPUBaseInfo.cpp:2894
llvm::AMDGPU::getVOPDEncodingFamily
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
Definition:AMDGPUBaseInfo.cpp:562
llvm::AMDGPU::isGFX10_AEncoding
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2230
llvm::AMDGPU::isKImmOperand
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
Definition:AMDGPUBaseInfo.cpp:2403
llvm::AMDGPU::getHasColorExport
bool getHasColorExport(const Function &F)
Definition:AMDGPUBaseInfo.cpp:2030
llvm::AMDGPU::getMTBUFBaseOpcode
int getMTBUFBaseOpcode(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:432
llvm::AMDGPU::isChainCC
bool isChainCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2092
llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2246
llvm::AMDGPU::getSamplecntBitMask
unsigned getSamplecntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1414
llvm::AMDGPU::getDefaultQueueImplicitArgPosition
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
Definition:AMDGPUBaseInfo.cpp:240
llvm::AMDGPU::hasSRAMECC
bool hasSRAMECC(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2110
llvm::AMDGPU::getHasDepthExport
bool getHasDepthExport(const Function &F)
Definition:AMDGPUBaseInfo.cpp:2037
llvm::AMDGPU::isGFX8_GFX9_GFX10
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2172
llvm::AMDGPU::getMUBUFHasVAddr
bool getMUBUFHasVAddr(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:477
llvm::AMDGPU::getVOPDFull
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)
Definition:AMDGPUBaseInfo.cpp:688
llvm::AMDGPU::isTrue16Inst
bool isTrue16Inst(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:654
llvm::AMDGPU::hasAny64BitVGPROperands
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
Definition:AMDGPUBaseInfo.cpp:3005
llvm::AMDGPU::getVOPDComponents
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
Definition:AMDGPUBaseInfo.cpp:694
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition:AMDGPUBaseInfo.cpp:2616
llvm::AMDGPU::isGFX12
bool isGFX12(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2206
llvm::AMDGPU::getInitialPSInputAddr
unsigned getInitialPSInputAddr(const Function &F)
Definition:AMDGPUBaseInfo.cpp:2026
llvm::AMDGPU::encodeExpcnt
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
Definition:AMDGPUBaseInfo.cpp:1496
llvm::AMDGPU::isSISrcOperand
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
Definition:AMDGPUBaseInfo.cpp:2396
llvm::AMDGPU::getKmcntBitMask
unsigned getKmcntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1434
llvm::AMDGPU::getVmcntBitMask
unsigned getVmcntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1404
llvm::AMDGPU::FPType
FPType
Definition:AMDGPUBaseInfo.h:58
llvm::AMDGPU::FPType::FP4
@ FP4
llvm::AMDGPU::FPType::None
@ None
llvm::AMDGPU::FPType::FP8
@ FP8
llvm::AMDGPU::isNotGFX10Plus
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2218
llvm::AMDGPU::hasMAIInsts
bool hasMAIInsts(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2258
llvm::AMDGPU::isIntrinsicSourceOfDivergence
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
Definition:AMDGPUBaseInfo.cpp:2977
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition:AMDGPUBaseInfo.cpp:2102
llvm::AMDGPU::isGenericAtomic
bool isGenericAtomic(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:634
llvm::AMDGPU::decodeStorecntDscnt
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
Definition:AMDGPUBaseInfo.cpp:1545
llvm::AMDGPU::isGFX8Plus
bool isGFX8Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2176
llvm::AMDGPU::hasNamedOperand
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
Definition:AMDGPUBaseInfo.h:400
llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition:AMDGPUBaseInfo.h:1458
llvm::AMDGPU::getLgkmcntBitMask
unsigned getLgkmcntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1426
llvm::AMDGPU::getMUBUFTfe
bool getMUBUFTfe(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:497
llvm::AMDGPU::getBvhcntBitMask
unsigned getBvhcntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1418
llvm::AMDGPU::hasSMRDSignedImmOffset
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
Definition:AMDGPUBaseInfo.cpp:163
llvm::AMDGPU::hasMIMG_R128
bool hasMIMG_R128(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2114
llvm::AMDGPU::hasGFX10_3Insts
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2238
llvm::AMDGPU::hasG16
bool hasG16(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2122
llvm::AMDGPU::getAddrSizeMIMGOp
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
Definition:AMDGPUBaseInfo.cpp:293
llvm::AMDGPU::getMTBUFOpcode
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
Definition:AMDGPUBaseInfo.cpp:437
llvm::AMDGPU::getExpcntBitMask
unsigned getExpcntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1422
llvm::AMDGPU::hasArchitectedFlatScratch
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2254
llvm::AMDGPU::getMUBUFHasSoffset
bool getMUBUFHasSoffset(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:487
llvm::AMDGPU::isNotGFX11Plus
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2214
llvm::AMDGPU::isGFX11Plus
bool isGFX11Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2202
llvm::AMDGPU::getInlineEncodingV2F16
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
Definition:AMDGPUBaseInfo.cpp:2769
llvm::AMDGPU::isInlineValue
bool isInlineValue(unsigned Reg)
Definition:AMDGPUBaseInfo.cpp:2367
llvm::AMDGPU::isSISrcFPOperand
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
Definition:AMDGPUBaseInfo.cpp:2410
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition:AMDGPUBaseInfo.cpp:2041
llvm::AMDGPU::getHostcallImplicitArgPosition
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
Definition:AMDGPUBaseInfo.cpp:229
llvm::AMDGPU::getDefaultCustomOperandEncoding
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1602
llvm::AMDGPU::encodeLoadcnt
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
Definition:AMDGPUBaseInfo.cpp:1555
llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2194
llvm::AMDGPU::decodeCustomOperand
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1633
llvm::AMDGPU::getSMRDEncodedOffset
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
Definition:AMDGPUBaseInfo.cpp:2907
llvm::AMDGPU::isGlobalSegment
bool isGlobalSegment(const GlobalValue *GV)
Definition:AMDGPUBaseInfo.cpp:1317
llvm::AMDGPU::OPERAND_KIMM_LAST
@ OPERAND_KIMM_LAST
Definition:SIDefines.h:269
llvm::AMDGPU::OPERAND_KIMM32
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition:SIDefines.h:234
llvm::AMDGPU::OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_LAST
Definition:SIDefines.h:260
llvm::AMDGPU::OPERAND_REG_IMM_V2FP16
@ OPERAND_REG_IMM_V2FP16
Definition:SIDefines.h:211
llvm::AMDGPU::OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_FP64
Definition:SIDefines.h:223
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_V2BF16
Definition:SIDefines.h:225
llvm::AMDGPU::OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_V2INT16
Definition:SIDefines.h:212
llvm::AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_INLINE_AC_V2FP16
Definition:SIDefines.h:246
llvm::AMDGPU::OPERAND_SRC_FIRST
@ OPERAND_SRC_FIRST
Definition:SIDefines.h:265
llvm::AMDGPU::OPERAND_REG_IMM_V2BF16
@ OPERAND_REG_IMM_V2BF16
Definition:SIDefines.h:210
llvm::AMDGPU::OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_FIRST
Definition:SIDefines.h:262
llvm::AMDGPU::OPERAND_KIMM_FIRST
@ OPERAND_KIMM_FIRST
Definition:SIDefines.h:268
llvm::AMDGPU::OPERAND_REG_IMM_FP16
@ OPERAND_REG_IMM_FP16
Definition:SIDefines.h:206
llvm::AMDGPU::OPERAND_REG_IMM_FP64
@ OPERAND_REG_IMM_FP64
Definition:SIDefines.h:204
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
Definition:SIDefines.h:226
llvm::AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_V2INT16
Definition:SIDefines.h:244
llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_FP16
Definition:SIDefines.h:241
llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_FP32
Definition:SIDefines.h:242
llvm::AMDGPU::OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_INLINE_AC_V2BF16
Definition:SIDefines.h:245
llvm::AMDGPU::OPERAND_REG_IMM_FP32
@ OPERAND_REG_IMM_FP32
Definition:SIDefines.h:203
llvm::AMDGPU::OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FIRST
Definition:SIDefines.h:259
llvm::AMDGPU::OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_FP32
Definition:SIDefines.h:222
llvm::AMDGPU::OPERAND_REG_INLINE_AC_LAST
@ OPERAND_REG_INLINE_AC_LAST
Definition:SIDefines.h:263
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_C_V2INT16
Definition:SIDefines.h:224
llvm::AMDGPU::OPERAND_REG_IMM_V2FP32
@ OPERAND_REG_IMM_V2FP32
Definition:SIDefines.h:214
llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_AC_FP64
Definition:SIDefines.h:243
llvm::AMDGPU::OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_FP16
Definition:SIDefines.h:221
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_REG_INLINE_C_V2FP32
Definition:SIDefines.h:228
llvm::AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition:SIDefines.h:209
llvm::AMDGPU::OPERAND_SRC_LAST
@ OPERAND_SRC_LAST
Definition:SIDefines.h:266
llvm::AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition:SIDefines.h:208
llvm::AMDGPU::initDefaultAMDKernelCodeT
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
Definition:AMDGPUBaseInfo.cpp:1279
llvm::AMDGPU::isNotGFX9Plus
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2184
llvm::AMDGPU::hasGDS
bool hasGDS(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2131
llvm::AMDGPU::isLegalSMRDEncodedUnsignedOffset
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
Definition:AMDGPUBaseInfo.cpp:2874
llvm::AMDGPU::isGFX9Plus
bool isGFX9Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2180
llvm::AMDGPU::hasDPPSrc1SGPR
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2266
llvm::AMDGPU::OPR_ID_DUPLICATE
const int OPR_ID_DUPLICATE
Definition:AMDGPUAsmUtils.h:25
llvm::AMDGPU::isVOPD
bool isVOPD(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:582
llvm::AMDGPU::getVOPDInstInfo
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
Definition:AMDGPUBaseInfo.cpp:790
llvm::AMDGPU::encodeVmcnt
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
Definition:AMDGPUBaseInfo.cpp:1487
llvm::AMDGPU::decodeExpcnt
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition:AMDGPUBaseInfo.cpp:1462
llvm::AMDGPU::isCvt_F32_Fp8_Bf8_e64
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:621
llvm::AMDGPU::decodeLoadcntDscnt
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
Definition:AMDGPUBaseInfo.cpp:1535
llvm::AMDGPU::getInlineEncodingV2I16
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
Definition:AMDGPUBaseInfo.cpp:2735
llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
Definition:SIRegisterInfo.cpp:3201
llvm::AMDGPU::encodeStorecntDscnt
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
Definition:AMDGPUBaseInfo.cpp:1585
llvm::AMDGPU::getMCOpcode
int getMCOpcode(uint16_t Opcode, unsigned Gen)
Definition:AMDGPUBaseInfo.cpp:684
llvm::AMDGPU::getMIMGBaseOpcode
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:280
llvm::AMDGPU::isVI
bool isVI(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2156
llvm::AMDGPU::getMUBUFIsBufferInv
bool getMUBUFIsBufferInv(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:492
llvm::AMDGPU::mc2PseudoReg
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
Definition:AMDGPUBaseInfo.cpp:2365
llvm::AMDGPU::getInlineEncodingV2BF16
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
Definition:AMDGPUBaseInfo.cpp:2741
llvm::AMDGPU::encodeCustomOperand
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:1657
llvm::AMDGPU::hasKernargPreload
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2270
llvm::AMDGPU::isMAC
bool isMAC(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:586
llvm::AMDGPU::isCI
bool isCI(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2152
llvm::AMDGPU::encodeLgkmcnt
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
Definition:AMDGPUBaseInfo.cpp:1502
llvm::AMDGPU::getVOP2IsSingle
bool getVOP2IsSingle(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:512
llvm::AMDGPU::getMAIIsDGEMM
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
Definition:AMDGPUBaseInfo.cpp:528
llvm::AMDGPU::getMIMGBaseOpcodeInfo
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
llvm::AMDGPU::OPR_ID_UNKNOWN
const int OPR_ID_UNKNOWN
Definition:AMDGPUAsmUtils.h:23
llvm::AMDGPU::getCompletionActionImplicitArgPosition
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
Definition:AMDGPUBaseInfo.cpp:251
llvm::AMDGPU::getIntegerVecAttribute
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
Definition:AMDGPUBaseInfo.cpp:1367
llvm::AMDGPU::getMaskedMIMGOp
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
Definition:AMDGPUBaseInfo.cpp:285
llvm::AMDGPU::isModuleEntryFunctionCC
bool isModuleEntryFunctionCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2083
llvm::AMDGPU::isNotGFX12Plus
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2212
llvm::AMDGPU::getMTBUFHasVAddr
bool getMTBUFHasVAddr(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:447
llvm::AMDGPU::decodeVmcnt
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition:AMDGPUBaseInfo.cpp:1454
llvm::AMDGPU::getELFABIVersion
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
Definition:AMDGPUBaseInfo.cpp:198
llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition:AMDGPUBaseInfo.cpp:1332
llvm::AMDGPU::getLoadcntBitMask
unsigned getLoadcntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1410
llvm::AMDGPU::isInlinableLiteralI16
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
Definition:AMDGPUBaseInfo.cpp:2659
llvm::AMDGPU::hasVOPD
bool hasVOPD(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2262
llvm::AMDGPU::encodeDscnt
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
Definition:AMDGPUBaseInfo.cpp:1567
llvm::AMDGPU::isInlinableLiteral64
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
Definition:AMDGPUBaseInfo.cpp:2599
llvm::AMDGPU::getMFMA_F8F6F4_WithFormatArgs
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
Definition:AMDGPUBaseInfo.cpp:554
llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
Definition:AMDGPUBaseInfo.cpp:215
llvm::AMDGPU::isGFX9_GFX10_GFX11
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2168
llvm::AMDGPU::isGFX9_GFX10
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2164
llvm::AMDGPU::getMUBUFElements
int getMUBUFElements(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:472
llvm::AMDGPU::encodeLoadcntDscnt
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
Definition:AMDGPUBaseInfo.cpp:1573
llvm::AMDGPU::getGcnBufferFormatInfo
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
Definition:AMDGPUBaseInfo.cpp:2985
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition:AMDGPUBaseInfo.cpp:2058
llvm::AMDGPU::mapWMMA3AddrTo2AddrOpcode
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:676
llvm::AMDGPU::isPermlane16
bool isPermlane16(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:610
llvm::AMDGPU::getMUBUFHasSrsrc
bool getMUBUFHasSrsrc(unsigned Opc)
Definition:AMDGPUBaseInfo.cpp:482
llvm::AMDGPU::getDscntBitMask
unsigned getDscntBitMask(const IsaVersion &Version)
Definition:AMDGPUBaseInfo.cpp:1430
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition:BitmaskEnum.h:125
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition:CallingConv.h:197
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition:CallingConv.h:188
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition:CallingConv.h:200
llvm::CallingConv::AMDGPU_Gfx
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition:CallingConv.h:232
llvm::CallingConv::AMDGPU_CS_ChainPreserve
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition:CallingConv.h:249
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition:CallingConv.h:206
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition:CallingConv.h:191
llvm::CallingConv::AMDGPU_CS_Chain
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition:CallingConv.h:245
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition:CallingConv.h:194
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition:CallingConv.h:144
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition:CallingConv.h:218
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition:CallingConv.h:213
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V4
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition:ELF.h:381
llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V5
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition:ELF.h:382
llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V6
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition:ELF.h:383
llvm::MCOI::TIED_TO
@ TIED_TO
Definition:MCInstrDesc.h:36
llvm::SIEncodingFamily::GFX11
@ GFX11
Definition:SIDefines.h:46
llvm::SIEncodingFamily::GFX12
@ GFX12
Definition:SIDefines.h:47
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition:MathExtras.h:556
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition:Error.cpp:167
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition:raw_ostream.cpp:907
llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition:MathExtras.h:404
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition:Alignment.h:155
llvm::ReplacementType::Format
@ Format
llvm::ReplacementType::Literal
@ Literal
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition:APFixedPoint.h:303
llvm::InstructionUniformity::AlwaysUniform
@ AlwaysUniform
The result values are always uniform.
llvm::InstructionUniformity::Default
@ Default
The result values are uniform if and only if all operands are uniform.
llvm::Version
@ Version
Definition:PGOCtxProfWriter.h:22
N
#define N
amd_kernel_code_t
AMD Kernel Code Object (amd_kernel_code_t).
Definition:AMDKernelCodeT.h:526
llvm::AMDGPU::AMDGPUMCKernelCodeT
Definition:AMDKernelCodeTUtils.h:33
llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_version_major
uint16_t amd_machine_version_major
Definition:AMDKernelCodeTUtils.h:41
llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_kind
uint16_t amd_machine_kind
Definition:AMDKernelCodeTUtils.h:40
llvm::AMDGPU::AMDGPUMCKernelCodeT::wavefront_size
uint8_t wavefront_size
Definition:AMDKernelCodeTUtils.h:63
llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_version_stepping
uint16_t amd_machine_version_stepping
Definition:AMDKernelCodeTUtils.h:43
llvm::AMDGPU::AMDGPUMCKernelCodeT::private_segment_alignment
uint8_t private_segment_alignment
Definition:AMDKernelCodeTUtils.h:62
llvm::AMDGPU::AMDGPUMCKernelCodeT::kernel_code_entry_byte_offset
int64_t kernel_code_entry_byte_offset
Definition:AMDKernelCodeTUtils.h:44
llvm::AMDGPU::AMDGPUMCKernelCodeT::code_properties
uint32_t code_properties
Definition:AMDKernelCodeTUtils.h:49
llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_kernel_code_version_major
uint32_t amd_kernel_code_version_major
Definition:AMDKernelCodeTUtils.h:38
llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_version_minor
uint16_t amd_machine_version_minor
Definition:AMDKernelCodeTUtils.h:42
llvm::AMDGPU::AMDGPUMCKernelCodeT::group_segment_alignment
uint8_t group_segment_alignment
Definition:AMDKernelCodeTUtils.h:61
llvm::AMDGPU::AMDGPUMCKernelCodeT::kernarg_segment_alignment
uint8_t kernarg_segment_alignment
Definition:AMDKernelCodeTUtils.h:60
llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_kernel_code_version_minor
uint32_t amd_kernel_code_version_minor
Definition:AMDKernelCodeTUtils.h:39
llvm::AMDGPU::AMDGPUMCKernelCodeT::call_convention
int32_t call_convention
Definition:AMDKernelCodeTUtils.h:64
llvm::AMDGPU::AMDGPUMCKernelCodeT::compute_pgm_resource_registers
uint64_t compute_pgm_resource_registers
Definition:AMDKernelCodeTUtils.h:48
llvm::AMDGPU::CanBeVOPD
Definition:AMDGPUBaseInfo.h:588
llvm::AMDGPU::CustomOperandVal
Definition:AMDGPUAsmUtils.h:34
llvm::AMDGPU::DPMACCInstructionInfo
Definition:AMDGPUBaseInfo.cpp:384
llvm::AMDGPU::DPMACCInstructionInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:385
llvm::AMDGPU::DPMACCInstructionInfo::IsDPMACCInstruction
bool IsDPMACCInstruction
Definition:AMDGPUBaseInfo.cpp:386
llvm::AMDGPU::Exp::ExpTgt
Definition:AMDGPUBaseInfo.cpp:1754
llvm::AMDGPU::Exp::ExpTgt::Tgt
unsigned Tgt
Definition:AMDGPUBaseInfo.cpp:1756
llvm::AMDGPU::Exp::ExpTgt::Name
StringLiteral Name
Definition:AMDGPUBaseInfo.cpp:1755
llvm::AMDGPU::Exp::ExpTgt::MaxIndex
unsigned MaxIndex
Definition:AMDGPUBaseInfo.cpp:1757
llvm::AMDGPU::FP4FP8DstByteSelInfo
Definition:AMDGPUBaseInfo.cpp:389
llvm::AMDGPU::FP4FP8DstByteSelInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:390
llvm::AMDGPU::FP4FP8DstByteSelInfo::HasFP8DstByteSel
bool HasFP8DstByteSel
Definition:AMDGPUBaseInfo.cpp:391
llvm::AMDGPU::FP4FP8DstByteSelInfo::HasFP4DstByteSel
bool HasFP4DstByteSel
Definition:AMDGPUBaseInfo.cpp:392
llvm::AMDGPU::GcnBufferFormatInfo
Definition:AMDGPUBaseInfo.h:87
llvm::AMDGPU::IsaVersion
Instruction set architecture version.
Definition:TargetParser.h:130
llvm::AMDGPU::MAIInstInfo
Definition:AMDGPUBaseInfo.h:95
llvm::AMDGPU::MFMA_F8F6F4_Info
Definition:AMDGPUBaseInfo.h:101
llvm::AMDGPU::MIMGBaseOpcodeInfo
Definition:AMDGPUBaseInfo.h:407
llvm::AMDGPU::MIMGBaseOpcodeInfo::Gradients
bool Gradients
Definition:AMDGPUBaseInfo.h:416
llvm::AMDGPU::MIMGBaseOpcodeInfo::G16
bool G16
Definition:AMDGPUBaseInfo.h:417
llvm::AMDGPU::MIMGBaseOpcodeInfo::LodOrClampOrMip
bool LodOrClampOrMip
Definition:AMDGPUBaseInfo.h:419
llvm::AMDGPU::MIMGBaseOpcodeInfo::Coordinates
bool Coordinates
Definition:AMDGPUBaseInfo.h:418
llvm::AMDGPU::MIMGBaseOpcodeInfo::NumExtraArgs
uint8_t NumExtraArgs
Definition:AMDGPUBaseInfo.h:415
llvm::AMDGPU::MIMGDimInfo
Definition:AMDGPUBaseInfo.h:433
llvm::AMDGPU::MIMGDimInfo::NumCoords
uint8_t NumCoords
Definition:AMDGPUBaseInfo.h:435
llvm::AMDGPU::MIMGDimInfo::NumGradients
uint8_t NumGradients
Definition:AMDGPUBaseInfo.h:436
llvm::AMDGPU::MIMGInfo
Definition:AMDGPUBaseInfo.h:509
llvm::AMDGPU::MIMGInfo::BaseOpcode
uint16_t BaseOpcode
Definition:AMDGPUBaseInfo.h:511
llvm::AMDGPU::MIMGInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.h:510
llvm::AMDGPU::MIMGInfo::VAddrDwords
uint8_t VAddrDwords
Definition:AMDGPUBaseInfo.h:514
llvm::AMDGPU::MIMGInfo::MIMGEncoding
uint8_t MIMGEncoding
Definition:AMDGPUBaseInfo.h:512
llvm::AMDGPU::MTBUFInfo
Definition:AMDGPUBaseInfo.cpp:332
llvm::AMDGPU::MTBUFInfo::BaseOpcode
uint16_t BaseOpcode
Definition:AMDGPUBaseInfo.cpp:334
llvm::AMDGPU::MTBUFInfo::has_srsrc
bool has_srsrc
Definition:AMDGPUBaseInfo.cpp:337
llvm::AMDGPU::MTBUFInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:333
llvm::AMDGPU::MTBUFInfo::has_vaddr
bool has_vaddr
Definition:AMDGPUBaseInfo.cpp:336
llvm::AMDGPU::MTBUFInfo::elements
uint8_t elements
Definition:AMDGPUBaseInfo.cpp:335
llvm::AMDGPU::MTBUFInfo::has_soffset
bool has_soffset
Definition:AMDGPUBaseInfo.cpp:338
llvm::AMDGPU::MUBUFInfo
Definition:AMDGPUBaseInfo.cpp:321
llvm::AMDGPU::MUBUFInfo::IsBufferInv
bool IsBufferInv
Definition:AMDGPUBaseInfo.cpp:328
llvm::AMDGPU::MUBUFInfo::has_srsrc
bool has_srsrc
Definition:AMDGPUBaseInfo.cpp:326
llvm::AMDGPU::MUBUFInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:322
llvm::AMDGPU::MUBUFInfo::elements
uint8_t elements
Definition:AMDGPUBaseInfo.cpp:324
llvm::AMDGPU::MUBUFInfo::tfe
bool tfe
Definition:AMDGPUBaseInfo.cpp:329
llvm::AMDGPU::MUBUFInfo::has_soffset
bool has_soffset
Definition:AMDGPUBaseInfo.cpp:327
llvm::AMDGPU::MUBUFInfo::has_vaddr
bool has_vaddr
Definition:AMDGPUBaseInfo.cpp:325
llvm::AMDGPU::MUBUFInfo::BaseOpcode
uint16_t BaseOpcode
Definition:AMDGPUBaseInfo.cpp:323
llvm::AMDGPU::SMInfo
Definition:AMDGPUBaseInfo.cpp:341
llvm::AMDGPU::SMInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:342
llvm::AMDGPU::SMInfo::IsBuffer
bool IsBuffer
Definition:AMDGPUBaseInfo.cpp:343
llvm::AMDGPU::VOP3CDPPAsmOnlyInfo
Definition:AMDGPUBaseInfo.cpp:359
llvm::AMDGPU::VOP3CDPPAsmOnlyInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:360
llvm::AMDGPU::VOPC64DPPInfo
Definition:AMDGPUBaseInfo.cpp:351
llvm::AMDGPU::VOPC64DPPInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:352
llvm::AMDGPU::VOPCDPPAsmOnlyInfo
Definition:AMDGPUBaseInfo.cpp:355
llvm::AMDGPU::VOPCDPPAsmOnlyInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:356
llvm::AMDGPU::VOPDComponentInfo
Definition:AMDGPUBaseInfo.cpp:363
llvm::AMDGPU::VOPDComponentInfo::VOPDOp
uint16_t VOPDOp
Definition:AMDGPUBaseInfo.cpp:365
llvm::AMDGPU::VOPDComponentInfo::BaseVOP
uint16_t BaseVOP
Definition:AMDGPUBaseInfo.cpp:364
llvm::AMDGPU::VOPDComponentInfo::CanBeVOPDX
bool CanBeVOPDX
Definition:AMDGPUBaseInfo.cpp:366
llvm::AMDGPU::VOPDInfo
Definition:AMDGPUBaseInfo.cpp:369
llvm::AMDGPU::VOPDInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:370
llvm::AMDGPU::VOPDInfo::OpX
uint16_t OpX
Definition:AMDGPUBaseInfo.cpp:371
llvm::AMDGPU::VOPDInfo::Subtarget
uint16_t Subtarget
Definition:AMDGPUBaseInfo.cpp:373
llvm::AMDGPU::VOPDInfo::OpY
uint16_t OpY
Definition:AMDGPUBaseInfo.cpp:372
llvm::AMDGPU::VOPInfo
Definition:AMDGPUBaseInfo.cpp:346
llvm::AMDGPU::VOPInfo::IsSingle
bool IsSingle
Definition:AMDGPUBaseInfo.cpp:348
llvm::AMDGPU::VOPInfo::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:347
llvm::AMDGPU::VOPTrue16Info
Definition:AMDGPUBaseInfo.cpp:376
llvm::AMDGPU::VOPTrue16Info::IsTrue16
bool IsTrue16
Definition:AMDGPUBaseInfo.cpp:378
llvm::AMDGPU::VOPTrue16Info::Opcode
uint16_t Opcode
Definition:AMDGPUBaseInfo.cpp:377
llvm::AMDGPU::WMMAOpcodeMappingInfo
Definition:AMDGPUBaseInfo.h:480
llvm::AMDGPU::Waitcnt
Represents the counter values to wait for in an s_waitcnt instruction.
Definition:AMDGPUBaseInfo.h:967
llvm::AMDGPU::Waitcnt::ExpCnt
unsigned ExpCnt
Definition:AMDGPUBaseInfo.h:969
llvm::AMDGPU::Waitcnt::LoadCnt
unsigned LoadCnt
Definition:AMDGPUBaseInfo.h:968
llvm::AMDGPU::Waitcnt::StoreCnt
unsigned StoreCnt
Definition:AMDGPUBaseInfo.h:971
llvm::AMDGPU::Waitcnt::DsCnt
unsigned DsCnt
Definition:AMDGPUBaseInfo.h:970
llvm::DWARFExpression::Operation::Description
Description of the encoding of one expression Op.
Definition:DWARFExpression.h:66
llvm::cl::desc
Definition:CommandLine.h:409

Generated on Sun Jul 20 2025 11:29:39 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp