Movatterモバイル変換

Go to the documentation of this file.

1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

9#include "AMDGPUBaseInfo.h"

10#include "AMDGPU.h"

11#include "AMDGPUAsmUtils.h"

12#include "AMDKernelCodeT.h"

13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

14#include "Utils/AMDKernelCodeTUtils.h"

15#include "llvm/ADT/StringExtras.h"

16#include "llvm/BinaryFormat/ELF.h"

17#include "llvm/IR/Attributes.h"

18#include "llvm/IR/Constants.h"

19#include "llvm/IR/Function.h"

20#include "llvm/IR/GlobalValue.h"

21#include "llvm/IR/IntrinsicsAMDGPU.h"

22#include "llvm/IR/IntrinsicsR600.h"

23#include "llvm/IR/LLVMContext.h"

24#include "llvm/MC/MCInstrInfo.h"

25#include "llvm/MC/MCRegisterInfo.h"

26#include "llvm/MC/MCSubtargetInfo.h"

27#include "llvm/Support/CommandLine.h"

28#include "llvm/TargetParser/TargetParser.h"

29#include <optional>

31#define GET_INSTRINFO_NAMED_OPS

32#define GET_INSTRMAP_INFO

33#include "AMDGPUGenInstrInfo.inc"

35staticllvm::cl::opt<unsigned>DefaultAMDHSACodeObjectVersion(

36"amdhsa-code-object-version",llvm::cl::Hidden,

37llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),

38llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "

39"or asm directive still take priority if present)"));

41namespace{

43/// \returns Bit mask for given bit \p Shift and bit \p Width.

44unsigned getBitMask(unsigned Shift,unsigned Width) {

45return ((1 << Width) - 1) << Shift;

46}

48/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.

49///

50/// \returns Packed \p Dst.

51unsigned packBits(unsigned Src,unsigned Dst,unsigned Shift,unsigned Width) {

52unsignedMask = getBitMask(Shift, Width);

53return ((Src << Shift) & Mask) | (Dst & ~Mask);

54}

56/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.

57///

58/// \returns Unpacked bits.

59unsigned unpackBits(unsigned Src,unsigned Shift,unsigned Width) {

60return (Src & getBitMask(Shift, Width)) >> Shift;

61}

63/// \returns Vmcnt bit shift (lower bits).

64unsigned getVmcntBitShiftLo(unsigned VersionMajor) {

65returnVersionMajor >= 11 ? 10 : 0;

66}

68/// \returns Vmcnt bit width (lower bits).

69unsigned getVmcntBitWidthLo(unsigned VersionMajor) {

70returnVersionMajor >= 11 ? 6 : 4;

71}

73/// \returns Expcnt bit shift.

74unsigned getExpcntBitShift(unsigned VersionMajor) {

75returnVersionMajor >= 11 ? 0 : 4;

76}

78/// \returns Expcnt bit width.

79unsigned getExpcntBitWidth(unsigned VersionMajor) {return 3; }

81/// \returns Lgkmcnt bit shift.

82unsigned getLgkmcntBitShift(unsigned VersionMajor) {

83returnVersionMajor >= 11 ? 4 : 8;

84}

86/// \returns Lgkmcnt bit width.

87unsigned getLgkmcntBitWidth(unsigned VersionMajor) {

88returnVersionMajor >= 10 ? 6 : 4;

89}

91/// \returns Vmcnt bit shift (higher bits).

92unsigned getVmcntBitShiftHi(unsigned VersionMajor) {return 14; }

94/// \returns Vmcnt bit width (higher bits).

95unsigned getVmcntBitWidthHi(unsigned VersionMajor) {

96return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;

97}

99/// \returns Loadcnt bit width

100unsigned getLoadcntBitWidth(unsigned VersionMajor) {

101returnVersionMajor >= 12 ? 6 : 0;

102}

103

104/// \returns Samplecnt bit width.

105unsigned getSamplecntBitWidth(unsigned VersionMajor) {

106returnVersionMajor >= 12 ? 6 : 0;

107}

108

109/// \returns Bvhcnt bit width.

110unsigned getBvhcntBitWidth(unsigned VersionMajor) {

111returnVersionMajor >= 12 ? 3 : 0;

112}

113

114/// \returns Dscnt bit width.

115unsigned getDscntBitWidth(unsigned VersionMajor) {

116returnVersionMajor >= 12 ? 6 : 0;

117}

118

119/// \returns Dscnt bit shift in combined S_WAIT instructions.

120unsigned getDscntBitShift(unsigned VersionMajor) {return 0; }

121

122/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.

123unsigned getStorecntBitWidth(unsigned VersionMajor) {

124returnVersionMajor >= 10 ? 6 : 0;

125}

126

127/// \returns Kmcnt bit width.

128unsigned getKmcntBitWidth(unsigned VersionMajor) {

129returnVersionMajor >= 12 ? 5 : 0;

130}

131

132/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.

133unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {

134returnVersionMajor >= 12 ? 8 : 0;

135}

136

137/// \returns VmVsrc bit width

138inlineunsigned getVmVsrcBitWidth() {return 3; }

139

140/// \returns VmVsrc bit shift

141inlineunsigned getVmVsrcBitShift() {return 2; }

142

143/// \returns VaVdst bit width

144inlineunsigned getVaVdstBitWidth() {return 4; }

145

146/// \returns VaVdst bit shift

147inlineunsigned getVaVdstBitShift() {return 12; }

148

149/// \returns SaSdst bit width

150inlineunsigned getSaSdstBitWidth() {return 1; }

151

152/// \returns SaSdst bit shift

153inlineunsigned getSaSdstBitShift() {return 0; }

154

155}// end anonymous namespace

156

157namespacellvm {

158

159namespaceAMDGPU {

160

161/// \returns true if the target supports signed immediate offset for SMRD

162/// instructions.

163boolhasSMRDSignedImmOffset(constMCSubtargetInfo &ST) {

164returnisGFX9Plus(ST);

165}

166

167/// \returns True if \p STI is AMDHSA.

168boolisHsaAbi(constMCSubtargetInfo &STI) {

169return STI.getTargetTriple().getOS() ==Triple::AMDHSA;

170}

171

172unsignedgetAMDHSACodeObjectVersion(constModule &M) {

173if (auto *Ver = mdconst::extract_or_null<ConstantInt>(

174 M.getModuleFlag("amdhsa_code_object_version"))) {

175return (unsigned)Ver->getZExtValue() / 100;

176 }

177

178returngetDefaultAMDHSACodeObjectVersion();

179}

180

181unsignedgetDefaultAMDHSACodeObjectVersion() {

182returnDefaultAMDHSACodeObjectVersion;

183}

184

185unsignedgetAMDHSACodeObjectVersion(unsigned ABIVersion) {

186switch (ABIVersion) {

187caseELF::ELFABIVERSION_AMDGPU_HSA_V4:

188return 4;

189caseELF::ELFABIVERSION_AMDGPU_HSA_V5:

190return 5;

191caseELF::ELFABIVERSION_AMDGPU_HSA_V6:

192return 6;

193default:

194returngetDefaultAMDHSACodeObjectVersion();

195 }

196}

197

198uint8_t getELFABIVersion(constTriple &T,unsigned CodeObjectVersion) {

199if (T.getOS() !=Triple::AMDHSA)

200return 0;

201

202switch (CodeObjectVersion) {

203case 4:

204returnELF::ELFABIVERSION_AMDGPU_HSA_V4;

205case 5:

206returnELF::ELFABIVERSION_AMDGPU_HSA_V5;

207case 6:

208returnELF::ELFABIVERSION_AMDGPU_HSA_V6;

209default:

210report_fatal_error("Unsupported AMDHSA Code Object Version " +

211Twine(CodeObjectVersion));

212 }

213}

214

215unsignedgetMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {

216switch (CodeObjectVersion) {

217caseAMDHSA_COV4:

218return 48;

219caseAMDHSA_COV5:

220caseAMDHSA_COV6:

221default:

222returnAMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;

223 }

224}

225

226

227// FIXME: All such magic numbers about the ABI should be in a

228// central TD file.

229unsignedgetHostcallImplicitArgPosition(unsigned CodeObjectVersion) {

230switch (CodeObjectVersion) {

231caseAMDHSA_COV4:

232return 24;

233caseAMDHSA_COV5:

234caseAMDHSA_COV6:

235default:

236returnAMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;

237 }

238}

239

240unsignedgetDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {

241switch (CodeObjectVersion) {

242caseAMDHSA_COV4:

243return 32;

244caseAMDHSA_COV5:

245caseAMDHSA_COV6:

246default:

247returnAMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;

248 }

249}

250

251unsignedgetCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {

252switch (CodeObjectVersion) {

253caseAMDHSA_COV4:

254return 40;

255caseAMDHSA_COV5:

256caseAMDHSA_COV6:

257default:

258returnAMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;

259 }

260}

261

262#define GET_MIMGBaseOpcodesTable_IMPL

263#define GET_MIMGDimInfoTable_IMPL

264#define GET_MIMGInfoTable_IMPL

265#define GET_MIMGLZMappingTable_IMPL

266#define GET_MIMGMIPMappingTable_IMPL

267#define GET_MIMGBiasMappingTable_IMPL

268#define GET_MIMGOffsetMappingTable_IMPL

269#define GET_MIMGG16MappingTable_IMPL

270#define GET_MAIInstInfoTable_IMPL

271#include "AMDGPUGenSearchableTables.inc"

272

273intgetMIMGOpcode(unsigned BaseOpcode,unsigned MIMGEncoding,

274unsigned VDataDwords,unsigned VAddrDwords) {

275constMIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,

276 VDataDwords, VAddrDwords);

277returnInfo ?Info->Opcode : -1;

278}

279

280constMIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {

281constMIMGInfo *Info =getMIMGInfo(Opc);

282returnInfo ?getMIMGBaseOpcodeInfo(Info->BaseOpcode) :nullptr;

283}

284

285intgetMaskedMIMGOp(unsigned Opc,unsigned NewChannels) {

286constMIMGInfo *OrigInfo =getMIMGInfo(Opc);

287constMIMGInfo *NewInfo =

288 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,

289 NewChannels, OrigInfo->VAddrDwords);

290return NewInfo ? NewInfo->Opcode : -1;

291}

292

293unsignedgetAddrSizeMIMGOp(constMIMGBaseOpcodeInfo *BaseOpcode,

294constMIMGDimInfo *Dim,bool IsA16,

295bool IsG16Supported) {

296unsigned AddrWords = BaseOpcode->NumExtraArgs;

297unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +

298 (BaseOpcode->LodOrClampOrMip ? 1 : 0);

299if (IsA16)

300 AddrWords +=divideCeil(AddrComponents, 2);

301else

302 AddrWords += AddrComponents;

303

304// Note: For subtargets that support A16 but not G16, enabling A16 also

305// enables 16 bit gradients.

306// For subtargets that support A16 (operand) and G16 (done with a different

307// instruction encoding), they are independent.

308

309if (BaseOpcode->Gradients) {

310if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)

311// There are two gradients per coordinate, we pack them separately.

312// For the 3d case,

313// we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)

314 AddrWords += alignTo<2>(Dim->NumGradients / 2);

315else

316 AddrWords += Dim->NumGradients;

317 }

318return AddrWords;

319}

320

321structMUBUFInfo {

322uint16_t Opcode;

323uint16_t BaseOpcode;

324uint8_t elements;

325boolhas_vaddr;

326boolhas_srsrc;

327boolhas_soffset;

328boolIsBufferInv;

329booltfe;

330};

331

332structMTBUFInfo {

333uint16_t Opcode;

334uint16_t BaseOpcode;

335uint8_t elements;

336boolhas_vaddr;

337boolhas_srsrc;

338boolhas_soffset;

339};

340

341structSMInfo {

342uint16_t Opcode;

343boolIsBuffer;

344};

345

346structVOPInfo {

347uint16_t Opcode;

348boolIsSingle;

349};

350

351structVOPC64DPPInfo {

352uint16_t Opcode;

353};

354

355structVOPCDPPAsmOnlyInfo {

356uint16_t Opcode;

357};

358

359structVOP3CDPPAsmOnlyInfo {

360uint16_t Opcode;

361};

362

363structVOPDComponentInfo {

364uint16_t BaseVOP;

365uint16_t VOPDOp;

366boolCanBeVOPDX;

367};

368

369structVOPDInfo {

370uint16_t Opcode;

371uint16_t OpX;

372uint16_t OpY;

373uint16_t Subtarget;

374};

375

376structVOPTrue16Info {

377uint16_t Opcode;

378boolIsTrue16;

379};

380

381#define GET_FP4FP8DstByteSelTable_DECL

382#define GET_FP4FP8DstByteSelTable_IMPL

383

384structDPMACCInstructionInfo {

385uint16_t Opcode;

386boolIsDPMACCInstruction;

387};

388

389structFP4FP8DstByteSelInfo {

390uint16_t Opcode;

391boolHasFP8DstByteSel;

392boolHasFP4DstByteSel;

393};

394

395#define GET_MTBUFInfoTable_DECL

396#define GET_MTBUFInfoTable_IMPL

397#define GET_MUBUFInfoTable_DECL

398#define GET_MUBUFInfoTable_IMPL

399#define GET_SMInfoTable_DECL

400#define GET_SMInfoTable_IMPL

401#define GET_VOP1InfoTable_DECL

402#define GET_VOP1InfoTable_IMPL

403#define GET_VOP2InfoTable_DECL

404#define GET_VOP2InfoTable_IMPL

405#define GET_VOP3InfoTable_DECL

406#define GET_VOP3InfoTable_IMPL

407#define GET_VOPC64DPPTable_DECL

408#define GET_VOPC64DPPTable_IMPL

409#define GET_VOPC64DPP8Table_DECL

410#define GET_VOPC64DPP8Table_IMPL

411#define GET_VOPCAsmOnlyInfoTable_DECL

412#define GET_VOPCAsmOnlyInfoTable_IMPL

413#define GET_VOP3CAsmOnlyInfoTable_DECL

414#define GET_VOP3CAsmOnlyInfoTable_IMPL

415#define GET_VOPDComponentTable_DECL

416#define GET_VOPDComponentTable_IMPL

417#define GET_VOPDPairs_DECL

418#define GET_VOPDPairs_IMPL

419#define GET_VOPTrue16Table_DECL

420#define GET_VOPTrue16Table_IMPL

421#define GET_WMMAOpcode2AddrMappingTable_DECL

422#define GET_WMMAOpcode2AddrMappingTable_IMPL

423#define GET_WMMAOpcode3AddrMappingTable_DECL

424#define GET_WMMAOpcode3AddrMappingTable_IMPL

425#define GET_getMFMA_F8F6F4_WithSize_DECL

426#define GET_getMFMA_F8F6F4_WithSize_IMPL

427#define GET_isMFMA_F8F6F4Table_IMPL

428#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL

429

430#include "AMDGPUGenSearchableTables.inc"

431

432intgetMTBUFBaseOpcode(unsigned Opc) {

433constMTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);

434returnInfo ?Info->BaseOpcode : -1;

435}

436

437intgetMTBUFOpcode(unsigned BaseOpc,unsigned Elements) {

438constMTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);

439returnInfo ?Info->Opcode : -1;

440}

441

442intgetMTBUFElements(unsigned Opc) {

443constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);

444returnInfo ?Info->elements : 0;

445}

446

447boolgetMTBUFHasVAddr(unsigned Opc) {

448constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);

449returnInfo ?Info->has_vaddr :false;

450}

451

452boolgetMTBUFHasSrsrc(unsigned Opc) {

453constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);

454returnInfo ?Info->has_srsrc :false;

455}

456

457boolgetMTBUFHasSoffset(unsigned Opc) {

458constMTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);

459returnInfo ?Info->has_soffset :false;

460}

461

462intgetMUBUFBaseOpcode(unsigned Opc) {

463constMUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);

464returnInfo ?Info->BaseOpcode : -1;

465}

466

467intgetMUBUFOpcode(unsigned BaseOpc,unsigned Elements) {

468constMUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);

469returnInfo ?Info->Opcode : -1;

470}

471

472intgetMUBUFElements(unsigned Opc) {

473constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);

474returnInfo ?Info->elements : 0;

475}

476

477boolgetMUBUFHasVAddr(unsigned Opc) {

478constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);

479returnInfo ?Info->has_vaddr :false;

480}

481

482boolgetMUBUFHasSrsrc(unsigned Opc) {

483constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);

484returnInfo ?Info->has_srsrc :false;

485}

486

487boolgetMUBUFHasSoffset(unsigned Opc) {

488constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);

489returnInfo ?Info->has_soffset :false;

490}

491

492boolgetMUBUFIsBufferInv(unsigned Opc) {

493constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);

494returnInfo ?Info->IsBufferInv :false;

495}

496

497boolgetMUBUFTfe(unsigned Opc) {

498constMUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);

499returnInfo ?Info->tfe :false;

500}

501

502boolgetSMEMIsBuffer(unsigned Opc) {

503constSMInfo *Info = getSMEMOpcodeHelper(Opc);

504returnInfo ?Info->IsBuffer :false;

505}

506

507boolgetVOP1IsSingle(unsigned Opc) {

508constVOPInfo *Info = getVOP1OpcodeHelper(Opc);

509returnInfo ?Info->IsSingle :true;

510}

511

512boolgetVOP2IsSingle(unsigned Opc) {

513constVOPInfo *Info = getVOP2OpcodeHelper(Opc);

514returnInfo ?Info->IsSingle :true;

515}

516

517boolgetVOP3IsSingle(unsigned Opc) {

518constVOPInfo *Info = getVOP3OpcodeHelper(Opc);

519returnInfo ?Info->IsSingle :true;

520}

521

522boolisVOPC64DPP(unsigned Opc) {

523return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);

524}

525

526boolisVOPCAsmOnly(unsigned Opc) {return isVOPCAsmOnlyOpcodeHelper(Opc); }

527

528boolgetMAIIsDGEMM(unsigned Opc) {

529constMAIInstInfo *Info = getMAIInstInfoHelper(Opc);

530returnInfo ?Info->is_dgemm :false;

531}

532

533boolgetMAIIsGFX940XDL(unsigned Opc) {

534constMAIInstInfo *Info = getMAIInstInfoHelper(Opc);

535returnInfo ?Info->is_gfx940_xdl :false;

536}

537

538uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {

539switch (EncodingVal) {

540caseMFMAScaleFormats::FP6_E2M3:

541caseMFMAScaleFormats::FP6_E3M2:

542return 6;

543caseMFMAScaleFormats::FP4_E2M1:

544return 4;

545caseMFMAScaleFormats::FP8_E4M3:

546caseMFMAScaleFormats::FP8_E5M2:

547default:

548return 8;

549 }

550

551llvm_unreachable("covered switch over mfma scale formats");

552}

553

554constMFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,

555unsigned BLGP,

556unsigned F8F8Opcode) {

557uint8_t SrcANumRegs =mfmaScaleF8F6F4FormatToNumRegs(CBSZ);

558uint8_t SrcBNumRegs =mfmaScaleF8F6F4FormatToNumRegs(BLGP);

559return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);

560}

561

562unsignedgetVOPDEncodingFamily(constMCSubtargetInfo &ST) {

563if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))

564returnSIEncodingFamily::GFX12;

565if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))

566returnSIEncodingFamily::GFX11;

567llvm_unreachable("Subtarget generation does not support VOPD!");

568}

569

570CanBeVOPD getCanBeVOPD(unsigned Opc) {

571constVOPDComponentInfo *Info = getVOPDComponentHelper(Opc);

572if (Info)

573return {Info->CanBeVOPDX,true};

574return {false,false};

575}

576

577unsignedgetVOPDOpcode(unsigned Opc) {

578constVOPDComponentInfo *Info = getVOPDComponentHelper(Opc);

579returnInfo ?Info->VOPDOp : ~0u;

580}

581

582boolisVOPD(unsigned Opc) {

583returnAMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);

584}

585

586boolisMAC(unsigned Opc) {

587return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||

588 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||

589 Opc == AMDGPU::V_MAC_F32_e64_vi ||

590 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||

591 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||

592 Opc == AMDGPU::V_MAC_F16_e64_vi ||

593 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||

594 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||

595 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||

596 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||

597 Opc == AMDGPU::V_FMAC_F32_e64_vi ||

598 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||

599 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||

600 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||

601 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||

602 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||

603 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||

604 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||

605 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||

606 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||

607 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;

608}

609

610boolisPermlane16(unsigned Opc) {

611return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||

612 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||

613 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||

614 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||

615 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||

616 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||

617 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||

618 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;

619}

620

621boolisCvt_F32_Fp8_Bf8_e64(unsigned Opc) {

622return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||

623 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||

624 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||

625 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||

626 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||

627 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||

628 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||

629 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||

630 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||

631 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;

632}

633

634boolisGenericAtomic(unsigned Opc) {

635return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||

636 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||

637 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||

638 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||

639 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||

640 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||

641 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||

642 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||

643 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||

644 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||

645 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||

646 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||

647 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||

648 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||

649 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||

650 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||

651 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;

652}

653

654boolisTrue16Inst(unsigned Opc) {

655constVOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);

656returnInfo ?Info->IsTrue16 :false;

657}

658

659FPType getFPDstSelType(unsigned Opc) {

660constFP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);

661if (!Info)

662returnFPType::None;

663if (Info->HasFP8DstByteSel)

664returnFPType::FP8;

665if (Info->HasFP4DstByteSel)

666returnFPType::FP4;

667

668returnFPType::None;

669}

670

671unsignedmapWMMA2AddrTo3AddrOpcode(unsigned Opc) {

672constWMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);

673returnInfo ?Info->Opcode3Addr : ~0u;

674}

675

676unsignedmapWMMA3AddrTo2AddrOpcode(unsigned Opc) {

677constWMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);

678returnInfo ?Info->Opcode2Addr : ~0u;

679}

680

681// Wrapper for Tablegen'd function. enum Subtarget is not defined in any

682// header files, so we need to wrap it in a function that takes unsigned

683// instead.

684intgetMCOpcode(uint16_t Opcode,unsigned Gen) {

685return getMCOpcodeGen(Opcode,static_cast<Subtarget>(Gen));

686}

687

688intgetVOPDFull(unsigned OpX,unsigned OpY,unsigned EncodingFamily) {

689constVOPDInfo *Info =

690 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);

691returnInfo ?Info->Opcode : -1;

692}

693

694std::pair<unsigned, unsigned>getVOPDComponents(unsigned VOPDOpcode) {

695constVOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);

696assert(Info);

697constauto *OpX = getVOPDBaseFromComponent(Info->OpX);

698constauto *OpY = getVOPDBaseFromComponent(Info->OpY);

699assert(OpX && OpY);

700return {OpX->BaseVOP, OpY->BaseVOP};

701}

702

703namespaceVOPD {

704

705ComponentProps::ComponentProps(constMCInstrDesc &OpDesc) {

706assert(OpDesc.getNumDefs() ==Component::DST_NUM);

707

708assert(OpDesc.getOperandConstraint(Component::SRC0,MCOI::TIED_TO) == -1);

709assert(OpDesc.getOperandConstraint(Component::SRC1,MCOI::TIED_TO) == -1);

710auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2,MCOI::TIED_TO);

711assert(TiedIdx == -1 || TiedIdx ==Component::DST);

712 HasSrc2Acc = TiedIdx != -1;

713

714 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();

715assert(SrcOperandsNum <=Component::MAX_SRC_NUM);

716

717auto OperandsNum = OpDesc.getNumOperands();

718unsigned CompOprIdx;

719for (CompOprIdx =Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {

720if (OpDesc.operands()[CompOprIdx].OperandType ==AMDGPU::OPERAND_KIMM32) {

721 MandatoryLiteralIdx = CompOprIdx;

722break;

723 }

724 }

725}

726

727unsignedComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const{

728assert(CompOprIdx <Component::MAX_OPR_NUM);

729

730if (CompOprIdx ==Component::DST)

731returngetIndexOfDstInParsedOperands();

732

733auto CompSrcIdx = CompOprIdx -Component::DST_NUM;

734if (CompSrcIdx <getCompParsedSrcOperandsNum())

735returngetIndexOfSrcInParsedOperands(CompSrcIdx);

736

737// The specified operand does not exist.

738return 0;

739}

740

741std::optional<unsigned>InstInfo::getInvalidCompOperandIndex(

742 std::function<unsigned(unsigned,unsigned)> GetRegIdx,bool SkipSrc) const{

743

744auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);

745auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);

746

747constunsigned CompOprNum =

748 SkipSrc ?Component::DST_NUM :Component::MAX_OPR_NUM;

749unsigned CompOprIdx;

750for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {

751unsigned BanksMasks =VOPD_VGPR_BANK_MASKS[CompOprIdx];

752if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&

753 ((OpXRegs[CompOprIdx] & BanksMasks) ==

754 (OpYRegs[CompOprIdx] & BanksMasks)))

755return CompOprIdx;

756 }

757

758return {};

759}

760

761// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used

762// by the specified component. If an operand is unused

763// or is not a VGPR, the corresponding value is 0.

764//

765// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index

766// for the specified component and MC operand. The callback must return 0

767// if the operand is not a register or not a VGPR.

768InstInfo::RegIndices InstInfo::getRegIndices(

769unsigned CompIdx,

770 std::function<unsigned(unsigned,unsigned)> GetRegIdx) const{

771assert(CompIdx <COMPONENTS_NUM);

772

773constauto &Comp = CompInfo[CompIdx];

774InstInfo::RegIndices RegIndices;

775

776RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());

777

778for (unsigned CompOprIdx : {SRC0,SRC1,SRC2}) {

779unsigned CompSrcIdx = CompOprIdx -DST_NUM;

780RegIndices[CompOprIdx] =

781 Comp.hasRegSrcOperand(CompSrcIdx)

782 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))

783 : 0;

784 }

785returnRegIndices;

786}

787

788}// namespace VOPD

789

790VOPD::InstInfo getVOPDInstInfo(constMCInstrDesc &OpX,constMCInstrDesc &OpY) {

791returnVOPD::InstInfo(OpX, OpY);

792}

793

794VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,

795constMCInstrInfo *InstrInfo) {

796auto [OpX, OpY] =getVOPDComponents(VOPDOpcode);

797constauto &OpXDesc = InstrInfo->get(OpX);

798constauto &OpYDesc = InstrInfo->get(OpY);

799VOPD::ComponentInfo OpXInfo(OpXDesc,VOPD::ComponentKind::COMPONENT_X);

800VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);

801returnVOPD::InstInfo(OpXInfo, OpYInfo);

802}

803

804namespaceIsaInfo {

805

806AMDGPUTargetID::AMDGPUTargetID(constMCSubtargetInfo &STI)

807 : STI(STI), XnackSetting(TargetIDSetting::Any),

808 SramEccSetting(TargetIDSetting::Any) {

809if (!STI.getFeatureBits().test(FeatureSupportsXNACK))

810 XnackSetting =TargetIDSetting::Unsupported;

811if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))

812 SramEccSetting =TargetIDSetting::Unsupported;

813}

814

815voidAMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {

816// Check if xnack or sramecc is explicitly enabled or disabled. In the

817// absence of the target features we assume we must generate code that can run

818// in any environment.

819SubtargetFeatures Features(FS);

820 std::optional<bool> XnackRequested;

821 std::optional<bool> SramEccRequested;

822

823for (const std::string &Feature : Features.getFeatures()) {

824if (Feature =="+xnack")

825 XnackRequested =true;

826elseif (Feature =="-xnack")

827 XnackRequested =false;

828elseif (Feature =="+sramecc")

829 SramEccRequested =true;

830elseif (Feature =="-sramecc")

831 SramEccRequested =false;

832 }

833

834bool XnackSupported =isXnackSupported();

835bool SramEccSupported =isSramEccSupported();

836

837if (XnackRequested) {

838if (XnackSupported) {

839 XnackSetting =

840 *XnackRequested ?TargetIDSetting::On :TargetIDSetting::Off;

841 }else {

842// If a specific xnack setting was requested and this GPU does not support

843// xnack emit a warning. Setting will remain set to "Unsupported".

844if (*XnackRequested) {

845errs() <<"warning: xnack 'On' was requested for a processor that does "

846"not support it!\n";

847 }else {

848errs() <<"warning: xnack 'Off' was requested for a processor that "

849"does not support it!\n";

850 }

851 }

852 }

853

854if (SramEccRequested) {

855if (SramEccSupported) {

856 SramEccSetting =

857 *SramEccRequested ?TargetIDSetting::On :TargetIDSetting::Off;

858 }else {

859// If a specific sramecc setting was requested and this GPU does not

860// support sramecc emit a warning. Setting will remain set to

861// "Unsupported".

862if (*SramEccRequested) {

863errs() <<"warning: sramecc 'On' was requested for a processor that "

864"does not support it!\n";

865 }else {

866errs() <<"warning: sramecc 'Off' was requested for a processor that "

867"does not support it!\n";

868 }

869 }

870 }

871}

872

873staticTargetIDSetting

874getTargetIDSettingFromFeatureString(StringRef FeatureString) {

875if (FeatureString.ends_with("-"))

876returnTargetIDSetting::Off;

877if (FeatureString.ends_with("+"))

878returnTargetIDSetting::On;

879

880llvm_unreachable("Malformed feature string");

881}

882

883voidAMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {

884SmallVector<StringRef, 3> TargetIDSplit;

885 TargetID.split(TargetIDSplit,':');

886

887for (constauto &FeatureString : TargetIDSplit) {

888if (FeatureString.starts_with("xnack"))

889 XnackSetting =getTargetIDSettingFromFeatureString(FeatureString);

890if (FeatureString.starts_with("sramecc"))

891 SramEccSetting =getTargetIDSettingFromFeatureString(FeatureString);

892 }

893}

894

895std::stringAMDGPUTargetID::toString() const{

896 std::string StringRep;

897raw_string_ostream StreamRep(StringRep);

898

899auto TargetTriple = STI.getTargetTriple();

900autoVersion =getIsaVersion(STI.getCPU());

901

902 StreamRep << TargetTriple.getArchName() <<'-'

903 << TargetTriple.getVendorName() <<'-'

904 << TargetTriple.getOSName() <<'-'

905 << TargetTriple.getEnvironmentName() <<'-';

906

907 std::string Processor;

908// TODO: Following else statement is present here because we used various

909// alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').

910// Remove once all aliases are removed from GCNProcessors.td.

911if (Version.Major >= 9)

912 Processor = STI.getCPU().str();

913else

914 Processor = (Twine("gfx") +Twine(Version.Major) +Twine(Version.Minor) +

915Twine(Version.Stepping))

916 .str();

917

918 std::string Features;

919if (STI.getTargetTriple().getOS() ==Triple::AMDHSA) {

920// sramecc.

921if (getSramEccSetting() ==TargetIDSetting::Off)

922 Features +=":sramecc-";

923elseif (getSramEccSetting() ==TargetIDSetting::On)

924 Features +=":sramecc+";

925// xnack.

926if (getXnackSetting() ==TargetIDSetting::Off)

927 Features +=":xnack-";

928elseif (getXnackSetting() ==TargetIDSetting::On)

929 Features +=":xnack+";

930 }

931

932 StreamRep << Processor << Features;

933

934return StringRep;

935}

936

937unsignedgetWavefrontSize(constMCSubtargetInfo *STI) {

938if (STI->getFeatureBits().test(FeatureWavefrontSize16))

939return 16;

940if (STI->getFeatureBits().test(FeatureWavefrontSize32))

941return 32;

942

943return 64;

944}

945

946unsignedgetLocalMemorySize(constMCSubtargetInfo *STI) {

947unsigned BytesPerCU =getAddressableLocalMemorySize(STI);

948

949// "Per CU" really means "per whatever functional block the waves of a

950// workgroup must share". So the effective local memory size is doubled in

951// WGP mode on gfx10.

952if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))

953 BytesPerCU *= 2;

954

955return BytesPerCU;

956}

957

958unsignedgetAddressableLocalMemorySize(constMCSubtargetInfo *STI) {

959if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))

960return 32768;

961if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))

962return 65536;

963if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))

964return 163840;

965return 0;

966}

967

968unsignedgetEUsPerCU(constMCSubtargetInfo *STI) {

969// "Per CU" really means "per whatever functional block the waves of a

970// workgroup must share". For gfx10 in CU mode this is the CU, which contains

971// two SIMDs.

972if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))

973return 2;

974// Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains

975// two CUs, so a total of four SIMDs.

976return 4;

977}

978

979unsignedgetMaxWorkGroupsPerCU(constMCSubtargetInfo *STI,

980unsigned FlatWorkGroupSize) {

981assert(FlatWorkGroupSize != 0);

982if (STI->getTargetTriple().getArch() !=Triple::amdgcn)

983return 8;

984unsigned MaxWaves =getMaxWavesPerEU(STI) *getEUsPerCU(STI);

985unsignedN =getWavesPerWorkGroup(STI, FlatWorkGroupSize);

986if (N == 1) {

987// Single-wave workgroups don't consume barrier resources.

988return MaxWaves;

989 }

990

991unsigned MaxBarriers = 16;

992if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))

993 MaxBarriers = 32;

994

995return std::min(MaxWaves /N, MaxBarriers);

996}

997

998unsignedgetMinWavesPerEU(constMCSubtargetInfo *STI) {

999return 1;

1000}

1001

1002unsignedgetMaxWavesPerEU(constMCSubtargetInfo *STI) {

1003// FIXME: Need to take scratch memory into account.

1004if (isGFX90A(*STI))

1005return 8;

1006if (!isGFX10Plus(*STI))

1007return 10;

1008returnhasGFX10_3Insts(*STI) ? 16 : 20;

1009}

1010

1011unsignedgetWavesPerEUForWorkGroup(constMCSubtargetInfo *STI,

1012unsigned FlatWorkGroupSize) {

1013returndivideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),

1014getEUsPerCU(STI));

1015}

1016

1017unsignedgetMinFlatWorkGroupSize(constMCSubtargetInfo *STI) {

1018return 1;

1019}

1020

1021unsignedgetMaxFlatWorkGroupSize(constMCSubtargetInfo *STI) {

1022// Some subtargets allow encoding 2048, but this isn't tested or supported.

1023return 1024;

1024}

1025

1026unsignedgetWavesPerWorkGroup(constMCSubtargetInfo *STI,

1027unsigned FlatWorkGroupSize) {

1028returndivideCeil(FlatWorkGroupSize,getWavefrontSize(STI));

1029}

1030

1031unsignedgetSGPRAllocGranule(constMCSubtargetInfo *STI) {

1032IsaVersion Version =getIsaVersion(STI->getCPU());

1033if (Version.Major >= 10)

1034returngetAddressableNumSGPRs(STI);

1035if (Version.Major >= 8)

1036return 16;

1037return 8;

1038}

1039

1040unsignedgetSGPREncodingGranule(constMCSubtargetInfo *STI) {

1041return 8;

1042}

1043

1044unsignedgetTotalNumSGPRs(constMCSubtargetInfo *STI) {

1045IsaVersion Version =getIsaVersion(STI->getCPU());

1046if (Version.Major >= 8)

1047return 800;

1048return 512;

1049}

1050

1051unsignedgetAddressableNumSGPRs(constMCSubtargetInfo *STI) {

1052if (STI->getFeatureBits().test(FeatureSGPRInitBug))

1053returnFIXED_NUM_SGPRS_FOR_INIT_BUG;

1054

1055IsaVersion Version =getIsaVersion(STI->getCPU());

1056if (Version.Major >= 10)

1057return 106;

1058if (Version.Major >= 8)

1059return 102;

1060return 104;

1061}

1062

1063unsignedgetMinNumSGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU) {

1064assert(WavesPerEU != 0);

1065

1066IsaVersion Version =getIsaVersion(STI->getCPU());

1067if (Version.Major >= 10)

1068return 0;

1069

1070if (WavesPerEU >=getMaxWavesPerEU(STI))

1071return 0;

1072

1073unsigned MinNumSGPRs =getTotalNumSGPRs(STI) / (WavesPerEU + 1);

1074if (STI->getFeatureBits().test(FeatureTrapHandler))

1075 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);

1076 MinNumSGPRs =alignDown(MinNumSGPRs,getSGPRAllocGranule(STI)) + 1;

1077return std::min(MinNumSGPRs,getAddressableNumSGPRs(STI));

1078}

1079

1080unsignedgetMaxNumSGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU,

1081bool Addressable) {

1082assert(WavesPerEU != 0);

1083

1084unsigned AddressableNumSGPRs =getAddressableNumSGPRs(STI);

1085IsaVersion Version =getIsaVersion(STI->getCPU());

1086if (Version.Major >= 10)

1087return Addressable ? AddressableNumSGPRs : 108;

1088if (Version.Major >= 8 && !Addressable)

1089 AddressableNumSGPRs = 112;

1090unsigned MaxNumSGPRs =getTotalNumSGPRs(STI) / WavesPerEU;

1091if (STI->getFeatureBits().test(FeatureTrapHandler))

1092 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);

1093 MaxNumSGPRs =alignDown(MaxNumSGPRs,getSGPRAllocGranule(STI));

1094return std::min(MaxNumSGPRs, AddressableNumSGPRs);

1095}

1096

1097unsignedgetNumExtraSGPRs(constMCSubtargetInfo *STI,bool VCCUsed,

1098bool FlatScrUsed,bool XNACKUsed) {

1099unsigned ExtraSGPRs = 0;

1100if (VCCUsed)

1101 ExtraSGPRs = 2;

1102

1103IsaVersion Version =getIsaVersion(STI->getCPU());

1104if (Version.Major >= 10)

1105return ExtraSGPRs;

1106

1107if (Version.Major < 8) {

1108if (FlatScrUsed)

1109 ExtraSGPRs = 4;

1110 }else {

1111if (XNACKUsed)

1112 ExtraSGPRs = 4;

1113

1114if (FlatScrUsed ||

1115 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))

1116 ExtraSGPRs = 6;

1117 }

1118

1119return ExtraSGPRs;

1120}

1121

1122unsignedgetNumExtraSGPRs(constMCSubtargetInfo *STI,bool VCCUsed,

1123bool FlatScrUsed) {

1124returngetNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,

1125 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));

1126}

1127

1128staticunsignedgetGranulatedNumRegisterBlocks(unsigned NumRegs,

1129unsigned Granule) {

1130returndivideCeil(std::max(1u, NumRegs), Granule);

1131}

1132

1133unsignedgetNumSGPRBlocks(constMCSubtargetInfo *STI,unsigned NumSGPRs) {

1134// SGPRBlocks is actual number of SGPR blocks minus 1.

1135returngetGranulatedNumRegisterBlocks(NumSGPRs,getSGPREncodingGranule(STI)) -

1136 1;

1137}

1138

1139unsignedgetVGPRAllocGranule(constMCSubtargetInfo *STI,

1140 std::optional<bool> EnableWavefrontSize32) {

1141if (STI->getFeatureBits().test(FeatureGFX90AInsts))

1142return 8;

1143

1144bool IsWave32 = EnableWavefrontSize32 ?

1145 *EnableWavefrontSize32 :

1146 STI->getFeatureBits().test(FeatureWavefrontSize32);

1147

1148if (STI->getFeatureBits().test(Feature1_5xVGPRs))

1149return IsWave32 ? 24 : 12;

1150

1151if (hasGFX10_3Insts(*STI))

1152return IsWave32 ? 16 : 8;

1153

1154return IsWave32 ? 8 : 4;

1155}

1156

1157unsignedgetVGPREncodingGranule(constMCSubtargetInfo *STI,

1158 std::optional<bool> EnableWavefrontSize32) {

1159if (STI->getFeatureBits().test(FeatureGFX90AInsts))

1160return 8;

1161

1162bool IsWave32 = EnableWavefrontSize32 ?

1163 *EnableWavefrontSize32 :

1164 STI->getFeatureBits().test(FeatureWavefrontSize32);

1165

1166return IsWave32 ? 8 : 4;

1167}

1168

1169unsignedgetTotalNumVGPRs(constMCSubtargetInfo *STI) {

1170if (STI->getFeatureBits().test(FeatureGFX90AInsts))

1171return 512;

1172if (!isGFX10Plus(*STI))

1173return 256;

1174bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);

1175if (STI->getFeatureBits().test(Feature1_5xVGPRs))

1176return IsWave32 ? 1536 : 768;

1177return IsWave32 ? 1024 : 512;

1178}

1179

1180unsignedgetAddressableNumArchVGPRs(constMCSubtargetInfo *STI) {return 256; }

1181

1182unsignedgetAddressableNumVGPRs(constMCSubtargetInfo *STI) {

1183if (STI->getFeatureBits().test(FeatureGFX90AInsts))

1184return 512;

1185returngetAddressableNumArchVGPRs(STI);

1186}

1187

1188unsignedgetNumWavesPerEUWithNumVGPRs(constMCSubtargetInfo *STI,

1189unsigned NumVGPRs) {

1190returngetNumWavesPerEUWithNumVGPRs(NumVGPRs,getVGPRAllocGranule(STI),

1191getMaxWavesPerEU(STI),

1192getTotalNumVGPRs(STI));

1193}

1194

1195unsignedgetNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs,unsigned Granule,

1196unsigned MaxWaves,

1197unsigned TotalNumVGPRs) {

1198if (NumVGPRs < Granule)

1199return MaxWaves;

1200unsigned RoundedRegs =alignTo(NumVGPRs, Granule);

1201return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);

1202}

1203

1204unsignedgetOccupancyWithNumSGPRs(unsigned SGPRs,unsigned MaxWaves,

1205AMDGPUSubtarget::Generation Gen) {

1206if (Gen >=AMDGPUSubtarget::GFX10)

1207return MaxWaves;

1208

1209if (Gen >=AMDGPUSubtarget::VOLCANIC_ISLANDS) {

1210if (SGPRs <= 80)

1211return 10;

1212if (SGPRs <= 88)

1213return 9;

1214if (SGPRs <= 100)

1215return 8;

1216return 7;

1217 }

1218if (SGPRs <= 48)

1219return 10;

1220if (SGPRs <= 56)

1221return 9;

1222if (SGPRs <= 64)

1223return 8;

1224if (SGPRs <= 72)

1225return 7;

1226if (SGPRs <= 80)

1227return 6;

1228return 5;

1229}

1230

1231unsignedgetMinNumVGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU) {

1232assert(WavesPerEU != 0);

1233

1234unsigned MaxWavesPerEU =getMaxWavesPerEU(STI);

1235if (WavesPerEU >= MaxWavesPerEU)

1236return 0;

1237

1238unsigned TotNumVGPRs =getTotalNumVGPRs(STI);

1239unsigned AddrsableNumVGPRs =getAddressableNumVGPRs(STI);

1240unsigned Granule =getVGPRAllocGranule(STI);

1241unsigned MaxNumVGPRs =alignDown(TotNumVGPRs / WavesPerEU, Granule);

1242

1243if (MaxNumVGPRs ==alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))

1244return 0;

1245

1246unsigned MinWavesPerEU =getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);

1247if (WavesPerEU < MinWavesPerEU)

1248returngetMinNumVGPRs(STI, MinWavesPerEU);

1249

1250unsigned MaxNumVGPRsNext =alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);

1251unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);

1252return std::min(MinNumVGPRs, AddrsableNumVGPRs);

1253}

1254

1255unsignedgetMaxNumVGPRs(constMCSubtargetInfo *STI,unsigned WavesPerEU) {

1256assert(WavesPerEU != 0);

1257

1258unsigned MaxNumVGPRs =alignDown(getTotalNumVGPRs(STI) / WavesPerEU,

1259getVGPRAllocGranule(STI));

1260unsigned AddressableNumVGPRs =getAddressableNumVGPRs(STI);

1261return std::min(MaxNumVGPRs, AddressableNumVGPRs);

1262}

1263

1264unsignedgetEncodedNumVGPRBlocks(constMCSubtargetInfo *STI,unsigned NumVGPRs,

1265 std::optional<bool> EnableWavefrontSize32) {

1266returngetGranulatedNumRegisterBlocks(

1267 NumVGPRs,getVGPREncodingGranule(STI, EnableWavefrontSize32)) -

1268 1;

1269}

1270

1271unsignedgetAllocatedNumVGPRBlocks(constMCSubtargetInfo *STI,

1272unsigned NumVGPRs,

1273 std::optional<bool> EnableWavefrontSize32) {

1274returngetGranulatedNumRegisterBlocks(

1275 NumVGPRs,getVGPRAllocGranule(STI, EnableWavefrontSize32));

1276}

1277}// end namespace IsaInfo

1278

1279voidinitDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,

1280constMCSubtargetInfo *STI) {

1281IsaVersion Version =getIsaVersion(STI->getCPU());

1282 KernelCode.amd_kernel_code_version_major = 1;

1283 KernelCode.amd_kernel_code_version_minor = 2;

1284 KernelCode.amd_machine_kind = 1;// AMD_MACHINE_KIND_AMDGPU

1285 KernelCode.amd_machine_version_major =Version.Major;

1286 KernelCode.amd_machine_version_minor =Version.Minor;

1287 KernelCode.amd_machine_version_stepping =Version.Stepping;

1288 KernelCode.kernel_code_entry_byte_offset =sizeof(amd_kernel_code_t);

1289if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {

1290 KernelCode.wavefront_size = 5;

1291 KernelCode.code_properties |=AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;

1292 }else {

1293 KernelCode.wavefront_size = 6;

1294 }

1295

1296// If the code object does not support indirect functions, then the value must

1297// be 0xffffffff.

1298 KernelCode.call_convention = -1;

1299

1300// These alignment values are specified in powers of two, so alignment =

1301// 2^n. The minimum alignment is 2^4 = 16.

1302 KernelCode.kernarg_segment_alignment = 4;

1303 KernelCode.group_segment_alignment = 4;

1304 KernelCode.private_segment_alignment = 4;

1305

1306if (Version.Major >= 10) {

1307 KernelCode.compute_pgm_resource_registers |=

1308S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |

1309S_00B848_MEM_ORDERED(1);

1310 }

1311}

1312

1313boolisGroupSegment(constGlobalValue *GV) {

1314return GV->getAddressSpace() ==AMDGPUAS::LOCAL_ADDRESS;

1315}

1316

1317boolisGlobalSegment(constGlobalValue *GV) {

1318return GV->getAddressSpace() ==AMDGPUAS::GLOBAL_ADDRESS;

1319}

1320

1321boolisReadOnlySegment(constGlobalValue *GV) {

1322unsigned AS = GV->getAddressSpace();

1323return AS ==AMDGPUAS::CONSTANT_ADDRESS ||

1324 AS ==AMDGPUAS::CONSTANT_ADDRESS_32BIT;

1325}

1326

1327boolshouldEmitConstantsToTextSection(constTriple &TT) {

1328return TT.getArch() ==Triple::r600;

1329}

1330

1331std::pair<unsigned, unsigned>

1332getIntegerPairAttribute(constFunction &F,StringRef Name,

1333 std::pair<unsigned, unsigned>Default,

1334bool OnlyFirstRequired) {

1335if (auto Attr =getIntegerPairAttribute(F,Name, OnlyFirstRequired))

1336return {Attr->first, Attr->second ? *(Attr->second) :Default.second};

1337returnDefault;

1338}

1339

1340std::optional<std::pair<unsigned, std::optional<unsigned>>>

1341getIntegerPairAttribute(constFunction &F,StringRef Name,

1342bool OnlyFirstRequired) {

1343Attribute A =F.getFnAttribute(Name);

1344if (!A.isStringAttribute())

1345return std::nullopt;

1346

1347LLVMContext &Ctx =F.getContext();

1348 std::pair<unsigned, std::optional<unsigned>> Ints;

1349 std::pair<StringRef, StringRef> Strs =A.getValueAsString().split(',');

1350if (Strs.first.trim().getAsInteger(0, Ints.first)) {

1351 Ctx.emitError("can't parse first integer attribute " +Name);

1352return std::nullopt;

1353 }

1354unsigned Second = 0;

1355if (Strs.second.trim().getAsInteger(0, Second)) {

1356if (!OnlyFirstRequired || !Strs.second.trim().empty()) {

1357 Ctx.emitError("can't parse second integer attribute " +Name);

1358return std::nullopt;

1359 }

1360 }else {

1361 Ints.second = Second;

1362 }

1363

1364return Ints;

1365}

1366

1367SmallVector<unsigned>getIntegerVecAttribute(constFunction &F,StringRef Name,

1368unsignedSize,

1369unsignedDefaultVal) {

1370assert(Size > 2);

1371SmallVector<unsigned>Default(Size,DefaultVal);

1372

1373Attribute A =F.getFnAttribute(Name);

1374if (!A.isStringAttribute())

1375returnDefault;

1376

1377SmallVector<unsigned> Vals(Size,DefaultVal);

1378

1379LLVMContext &Ctx =F.getContext();

1380

1381StringRef S =A.getValueAsString();

1382unsigned i = 0;

1383for (; !S.empty() && i <Size; i++) {

1384 std::pair<StringRef, StringRef> Strs = S.split(',');

1385unsigned IntVal;

1386if (Strs.first.trim().getAsInteger(0, IntVal)) {

1387 Ctx.emitError("can't parse integer attribute " + Strs.first +" in " +

1388Name);

1389returnDefault;

1390 }

1391 Vals[i] = IntVal;

1392 S = Strs.second;

1393 }

1394

1395if (!S.empty() || i <Size) {

1396 Ctx.emitError("attribute " +Name +

1397" has incorrect number of integers; expected " +

1398 llvm::utostr(Size));

1399returnDefault;

1400 }

1401return Vals;

1402}

1403

1404unsignedgetVmcntBitMask(constIsaVersion &Version) {

1405return (1 << (getVmcntBitWidthLo(Version.Major) +

1406 getVmcntBitWidthHi(Version.Major))) -

1407 1;

1408}

1409

1410unsignedgetLoadcntBitMask(constIsaVersion &Version) {

1411return (1 << getLoadcntBitWidth(Version.Major)) - 1;

1412}

1413

1414unsignedgetSamplecntBitMask(constIsaVersion &Version) {

1415return (1 << getSamplecntBitWidth(Version.Major)) - 1;

1416}

1417

1418unsignedgetBvhcntBitMask(constIsaVersion &Version) {

1419return (1 << getBvhcntBitWidth(Version.Major)) - 1;

1420}

1421

1422unsignedgetExpcntBitMask(constIsaVersion &Version) {

1423return (1 << getExpcntBitWidth(Version.Major)) - 1;

1424}

1425

1426unsignedgetLgkmcntBitMask(constIsaVersion &Version) {

1427return (1 << getLgkmcntBitWidth(Version.Major)) - 1;

1428}

1429

1430unsignedgetDscntBitMask(constIsaVersion &Version) {

1431return (1 << getDscntBitWidth(Version.Major)) - 1;

1432}

1433

1434unsignedgetKmcntBitMask(constIsaVersion &Version) {

1435return (1 << getKmcntBitWidth(Version.Major)) - 1;

1436}

1437

1438unsignedgetStorecntBitMask(constIsaVersion &Version) {

1439return (1 << getStorecntBitWidth(Version.Major)) - 1;

1440}

1441

1442unsignedgetWaitcntBitMask(constIsaVersion &Version) {

1443unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),

1444 getVmcntBitWidthLo(Version.Major));

1445unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),

1446 getExpcntBitWidth(Version.Major));

1447unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),

1448 getLgkmcntBitWidth(Version.Major));

1449unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),

1450 getVmcntBitWidthHi(Version.Major));

1451return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;

1452}

1453

1454unsigneddecodeVmcnt(constIsaVersion &Version,unsignedWaitcnt) {

1455unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),

1456 getVmcntBitWidthLo(Version.Major));

1457unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),

1458 getVmcntBitWidthHi(Version.Major));

1459return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);

1460}

1461

1462unsigneddecodeExpcnt(constIsaVersion &Version,unsignedWaitcnt) {

1463return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),

1464 getExpcntBitWidth(Version.Major));

1465}

1466

1467unsigneddecodeLgkmcnt(constIsaVersion &Version,unsignedWaitcnt) {

1468return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),

1469 getLgkmcntBitWidth(Version.Major));

1470}

1471

1472voiddecodeWaitcnt(constIsaVersion &Version,unsignedWaitcnt,

1473unsigned &Vmcnt,unsigned &Expcnt,unsigned &Lgkmcnt) {

1474 Vmcnt =decodeVmcnt(Version,Waitcnt);

1475 Expcnt =decodeExpcnt(Version,Waitcnt);

1476 Lgkmcnt =decodeLgkmcnt(Version,Waitcnt);

1477}

1478

1479Waitcnt decodeWaitcnt(constIsaVersion &Version,unsigned Encoded) {

1480Waitcnt Decoded;

1481 Decoded.LoadCnt =decodeVmcnt(Version, Encoded);

1482 Decoded.ExpCnt =decodeExpcnt(Version, Encoded);

1483 Decoded.DsCnt =decodeLgkmcnt(Version, Encoded);

1484return Decoded;

1485}

1486

1487unsignedencodeVmcnt(constIsaVersion &Version,unsignedWaitcnt,

1488unsigned Vmcnt) {

1489Waitcnt = packBits(Vmcnt,Waitcnt, getVmcntBitShiftLo(Version.Major),

1490 getVmcntBitWidthLo(Version.Major));

1491return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major),Waitcnt,

1492 getVmcntBitShiftHi(Version.Major),

1493 getVmcntBitWidthHi(Version.Major));

1494}

1495

1496unsignedencodeExpcnt(constIsaVersion &Version,unsignedWaitcnt,

1497unsigned Expcnt) {

1498return packBits(Expcnt,Waitcnt, getExpcntBitShift(Version.Major),

1499 getExpcntBitWidth(Version.Major));

1500}

1501

1502unsignedencodeLgkmcnt(constIsaVersion &Version,unsignedWaitcnt,

1503unsigned Lgkmcnt) {

1504return packBits(Lgkmcnt,Waitcnt, getLgkmcntBitShift(Version.Major),

1505 getLgkmcntBitWidth(Version.Major));

1506}

1507

1508unsignedencodeWaitcnt(constIsaVersion &Version,

1509unsigned Vmcnt,unsigned Expcnt,unsigned Lgkmcnt) {

1510unsignedWaitcnt =getWaitcntBitMask(Version);

1511Waitcnt =encodeVmcnt(Version,Waitcnt, Vmcnt);

1512Waitcnt =encodeExpcnt(Version,Waitcnt, Expcnt);

1513Waitcnt =encodeLgkmcnt(Version,Waitcnt, Lgkmcnt);

1514returnWaitcnt;

1515}

1516

1517unsignedencodeWaitcnt(constIsaVersion &Version,constWaitcnt &Decoded) {

1518returnencodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);

1519}

1520

1521staticunsignedgetCombinedCountBitMask(constIsaVersion &Version,

1522bool IsStore) {

1523unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),

1524 getDscntBitWidth(Version.Major));

1525if (IsStore) {

1526unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),

1527 getStorecntBitWidth(Version.Major));

1528return Dscnt | Storecnt;

1529 }

1530unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),

1531 getLoadcntBitWidth(Version.Major));

1532return Dscnt | Loadcnt;

1533}

1534

1535Waitcnt decodeLoadcntDscnt(constIsaVersion &Version,unsigned LoadcntDscnt) {

1536Waitcnt Decoded;

1537 Decoded.LoadCnt =

1538 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),

1539 getLoadcntBitWidth(Version.Major));

1540 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),

1541 getDscntBitWidth(Version.Major));

1542return Decoded;

1543}

1544

1545Waitcnt decodeStorecntDscnt(constIsaVersion &Version,unsigned StorecntDscnt) {

1546Waitcnt Decoded;

1547 Decoded.StoreCnt =

1548 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),

1549 getStorecntBitWidth(Version.Major));

1550 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),

1551 getDscntBitWidth(Version.Major));

1552return Decoded;

1553}

1554

1555staticunsignedencodeLoadcnt(constIsaVersion &Version,unsignedWaitcnt,

1556unsigned Loadcnt) {

1557return packBits(Loadcnt,Waitcnt, getLoadcntStorecntBitShift(Version.Major),

1558 getLoadcntBitWidth(Version.Major));

1559}

1560

1561staticunsignedencodeStorecnt(constIsaVersion &Version,unsignedWaitcnt,

1562unsigned Storecnt) {

1563return packBits(Storecnt,Waitcnt, getLoadcntStorecntBitShift(Version.Major),

1564 getStorecntBitWidth(Version.Major));

1565}

1566

1567staticunsignedencodeDscnt(constIsaVersion &Version,unsignedWaitcnt,

1568unsigned Dscnt) {

1569return packBits(Dscnt,Waitcnt, getDscntBitShift(Version.Major),

1570 getDscntBitWidth(Version.Major));

1571}

1572

1573staticunsignedencodeLoadcntDscnt(constIsaVersion &Version,unsigned Loadcnt,

1574unsigned Dscnt) {

1575unsignedWaitcnt =getCombinedCountBitMask(Version,false);

1576Waitcnt =encodeLoadcnt(Version,Waitcnt, Loadcnt);

1577Waitcnt =encodeDscnt(Version,Waitcnt, Dscnt);

1578returnWaitcnt;

1579}

1580

1581unsignedencodeLoadcntDscnt(constIsaVersion &Version,constWaitcnt &Decoded) {

1582returnencodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);

1583}

1584

1585staticunsignedencodeStorecntDscnt(constIsaVersion &Version,

1586unsigned Storecnt,unsigned Dscnt) {

1587unsignedWaitcnt =getCombinedCountBitMask(Version,true);

1588Waitcnt =encodeStorecnt(Version,Waitcnt, Storecnt);

1589Waitcnt =encodeDscnt(Version,Waitcnt, Dscnt);

1590returnWaitcnt;

1591}

1592

1593unsignedencodeStorecntDscnt(constIsaVersion &Version,

1594constWaitcnt &Decoded) {

1595returnencodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);

1596}

1597

1598//===----------------------------------------------------------------------===//

1599// Custom Operand Values

1600//===----------------------------------------------------------------------===//

1601

1602staticunsignedgetDefaultCustomOperandEncoding(constCustomOperandVal *Opr,

1603intSize,

1604constMCSubtargetInfo &STI) {

1605unsigned Enc = 0;

1606for (intIdx = 0;Idx <Size; ++Idx) {

1607constauto &Op = Opr[Idx];

1608if (Op.isSupported(STI))

1609 Enc |=Op.encode(Op.Default);

1610 }

1611return Enc;

1612}

1613

1614staticboolisSymbolicCustomOperandEncoding(constCustomOperandVal *Opr,

1615intSize,unsigned Code,

1616bool &HasNonDefaultVal,

1617constMCSubtargetInfo &STI) {

1618unsigned UsedOprMask = 0;

1619 HasNonDefaultVal =false;

1620for (intIdx = 0;Idx <Size; ++Idx) {

1621constauto &Op = Opr[Idx];

1622if (!Op.isSupported(STI))

1623continue;

1624 UsedOprMask |=Op.getMask();

1625unsigned Val =Op.decode(Code);

1626if (!Op.isValid(Val))

1627returnfalse;

1628 HasNonDefaultVal |= (Val !=Op.Default);

1629 }

1630return (Code & ~UsedOprMask) == 0;

1631}

1632

1633staticbooldecodeCustomOperand(constCustomOperandVal *Opr,intSize,

1634unsigned Code,int &Idx,StringRef &Name,

1635unsigned &Val,bool &IsDefault,

1636constMCSubtargetInfo &STI) {

1637while (Idx <Size) {

1638constauto &Op = Opr[Idx++];

1639if (Op.isSupported(STI)) {

1640Name =Op.Name;

1641 Val =Op.decode(Code);

1642 IsDefault = (Val ==Op.Default);

1643returntrue;

1644 }

1645 }

1646

1647returnfalse;

1648}

1649

1650staticintencodeCustomOperandVal(constCustomOperandVal &Op,

1651 int64_t InputVal) {

1652if (InputVal < 0 || InputVal >Op.Max)

1653returnOPR_VAL_INVALID;

1654returnOp.encode(InputVal);

1655}

1656

1657staticintencodeCustomOperand(constCustomOperandVal *Opr,intSize,

1658constStringRef Name, int64_t InputVal,

1659unsigned &UsedOprMask,

1660constMCSubtargetInfo &STI) {

1661int InvalidId =OPR_ID_UNKNOWN;

1662for (intIdx = 0;Idx <Size; ++Idx) {

1663constauto &Op = Opr[Idx];

1664if (Op.Name ==Name) {

1665if (!Op.isSupported(STI)) {

1666 InvalidId =OPR_ID_UNSUPPORTED;

1667continue;

1668 }

1669auto OprMask =Op.getMask();

1670if (OprMask & UsedOprMask)

1671returnOPR_ID_DUPLICATE;

1672 UsedOprMask |= OprMask;

1673returnencodeCustomOperandVal(Op, InputVal);

1674 }

1675 }

1676return InvalidId;

1677}

1678

1679//===----------------------------------------------------------------------===//

1680// DepCtr

1681//===----------------------------------------------------------------------===//

1682

1683namespaceDepCtr {

1684

1685intgetDefaultDepCtrEncoding(constMCSubtargetInfo &STI) {

1686staticintDefault = -1;

1687if (Default == -1)

1688Default =getDefaultCustomOperandEncoding(DepCtrInfo,DEP_CTR_SIZE, STI);

1689returnDefault;

1690}

1691

1692boolisSymbolicDepCtrEncoding(unsigned Code,bool &HasNonDefaultVal,

1693constMCSubtargetInfo &STI) {

1694returnisSymbolicCustomOperandEncoding(DepCtrInfo,DEP_CTR_SIZE, Code,

1695 HasNonDefaultVal, STI);

1696}

1697

1698booldecodeDepCtr(unsigned Code,int &Id,StringRef &Name,unsigned &Val,

1699bool &IsDefault,constMCSubtargetInfo &STI) {

1700returndecodeCustomOperand(DepCtrInfo,DEP_CTR_SIZE, Code, Id,Name, Val,

1701 IsDefault, STI);

1702}

1703

1704intencodeDepCtr(constStringRef Name, int64_t Val,unsigned &UsedOprMask,

1705constMCSubtargetInfo &STI) {

1706returnencodeCustomOperand(DepCtrInfo,DEP_CTR_SIZE,Name, Val, UsedOprMask,

1707 STI);

1708}

1709

1710unsigneddecodeFieldVmVsrc(unsigned Encoded) {

1711return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());

1712}

1713

1714unsigneddecodeFieldVaVdst(unsigned Encoded) {

1715return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());

1716}

1717

1718unsigneddecodeFieldSaSdst(unsigned Encoded) {

1719return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());

1720}

1721

1722unsignedencodeFieldVmVsrc(unsigned Encoded,unsigned VmVsrc) {

1723return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());

1724}

1725

1726unsignedencodeFieldVmVsrc(unsigned VmVsrc) {

1727returnencodeFieldVmVsrc(0xffff, VmVsrc);

1728}

1729

1730unsignedencodeFieldVaVdst(unsigned Encoded,unsigned VaVdst) {

1731return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());

1732}

1733

1734unsignedencodeFieldVaVdst(unsigned VaVdst) {

1735returnencodeFieldVaVdst(0xffff, VaVdst);

1736}

1737

1738unsignedencodeFieldSaSdst(unsigned Encoded,unsigned SaSdst) {

1739return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());

1740}

1741

1742unsignedencodeFieldSaSdst(unsigned SaSdst) {

1743returnencodeFieldSaSdst(0xffff, SaSdst);

1744}

1745

1746}// namespace DepCtr

1747

1748//===----------------------------------------------------------------------===//

1749// exp tgt

1750//===----------------------------------------------------------------------===//

1751

1752namespaceExp {

1753

1754structExpTgt {

1755StringLiteral Name;

1756unsignedTgt;

1757unsignedMaxIndex;

1758};

1759

1760staticconstexprExpTgt ExpTgtInfo[] = {

1761 {{"null"},ET_NULL,ET_NULL_MAX_IDX},

1762 {{"mrtz"},ET_MRTZ,ET_MRTZ_MAX_IDX},

1763 {{"prim"},ET_PRIM,ET_PRIM_MAX_IDX},

1764 {{"mrt"},ET_MRT0,ET_MRT_MAX_IDX},

1765 {{"pos"},ET_POS0,ET_POS_MAX_IDX},

1766 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0,ET_DUAL_SRC_BLEND_MAX_IDX},

1767 {{"param"},ET_PARAM0,ET_PARAM_MAX_IDX},

1768};

1769

1770boolgetTgtName(unsigned Id,StringRef &Name,int &Index) {

1771for (constExpTgt &Val :ExpTgtInfo) {

1772if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {

1773Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);

1774Name = Val.Name;

1775returntrue;

1776 }

1777 }

1778returnfalse;

1779}

1780

1781unsignedgetTgtId(constStringRef Name) {

1782

1783for (constExpTgt &Val :ExpTgtInfo) {

1784if (Val.MaxIndex == 0 &&Name == Val.Name)

1785return Val.Tgt;

1786

1787if (Val.MaxIndex > 0 &&Name.starts_with(Val.Name)) {

1788StringRef Suffix =Name.drop_front(Val.Name.size());

1789

1790unsigned Id;

1791if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)

1792returnET_INVALID;

1793

1794// Disable leading zeroes

1795if (Suffix.size() > 1 && Suffix[0] =='0')

1796returnET_INVALID;

1797

1798return Val.Tgt + Id;

1799 }

1800 }

1801returnET_INVALID;

1802}

1803

1804boolisSupportedTgtId(unsigned Id,constMCSubtargetInfo &STI) {

1805switch (Id) {

1806caseET_NULL:

1807return !isGFX11Plus(STI);

1808caseET_POS4:

1809caseET_PRIM:

1810returnisGFX10Plus(STI);

1811caseET_DUAL_SRC_BLEND0:

1812caseET_DUAL_SRC_BLEND1:

1813returnisGFX11Plus(STI);

1814default:

1815if (Id >=ET_PARAM0 && Id <=ET_PARAM31)

1816return !isGFX11Plus(STI);

1817returntrue;

1818 }

1819}

1820

1821}// namespace Exp

1822

1823//===----------------------------------------------------------------------===//

1824// MTBUF Format

1825//===----------------------------------------------------------------------===//

1826

1827namespaceMTBUFFormat {

1828

1829int64_tgetDfmt(constStringRef Name) {

1830for (int Id =DFMT_MIN; Id <=DFMT_MAX; ++Id) {

1831if (Name ==DfmtSymbolic[Id])

1832return Id;

1833 }

1834returnDFMT_UNDEF;

1835}

1836

1837StringRef getDfmtName(unsigned Id) {

1838assert(Id <=DFMT_MAX);

1839returnDfmtSymbolic[Id];

1840}

1841

1842staticStringLiteralconst *getNfmtLookupTable(constMCSubtargetInfo &STI) {

1843if (isSI(STI) ||isCI(STI))

1844returnNfmtSymbolicSICI;

1845if (isVI(STI) ||isGFX9(STI))

1846returnNfmtSymbolicVI;

1847returnNfmtSymbolicGFX10;

1848}

1849

1850int64_tgetNfmt(constStringRef Name,constMCSubtargetInfo &STI) {

1851constauto *lookupTable =getNfmtLookupTable(STI);

1852for (int Id =NFMT_MIN; Id <=NFMT_MAX; ++Id) {

1853if (Name == lookupTable[Id])

1854return Id;

1855 }

1856returnNFMT_UNDEF;

1857}

1858

1859StringRef getNfmtName(unsigned Id,constMCSubtargetInfo &STI) {

1860assert(Id <=NFMT_MAX);

1861returngetNfmtLookupTable(STI)[Id];

1862}

1863

1864boolisValidDfmtNfmt(unsigned Id,constMCSubtargetInfo &STI) {

1865unsigned Dfmt;

1866unsigned Nfmt;

1867decodeDfmtNfmt(Id, Dfmt, Nfmt);

1868returnisValidNfmt(Nfmt, STI);

1869}

1870

1871boolisValidNfmt(unsigned Id,constMCSubtargetInfo &STI) {

1872return !getNfmtName(Id, STI).empty();

1873}

1874

1875int64_tencodeDfmtNfmt(unsigned Dfmt,unsigned Nfmt) {

1876return (Dfmt <<DFMT_SHIFT) | (Nfmt <<NFMT_SHIFT);

1877}

1878

1879voiddecodeDfmtNfmt(unsignedFormat,unsigned &Dfmt,unsigned &Nfmt) {

1880 Dfmt = (Format >>DFMT_SHIFT) &DFMT_MASK;

1881 Nfmt = (Format >>NFMT_SHIFT) &NFMT_MASK;

1882}

1883

1884int64_tgetUnifiedFormat(constStringRef Name,constMCSubtargetInfo &STI) {

1885if (isGFX11Plus(STI)) {

1886for (int Id =UfmtGFX11::UFMT_FIRST; Id <=UfmtGFX11::UFMT_LAST; ++Id) {

1887if (Name ==UfmtSymbolicGFX11[Id])

1888return Id;

1889 }

1890 }else {

1891for (int Id =UfmtGFX10::UFMT_FIRST; Id <=UfmtGFX10::UFMT_LAST; ++Id) {

1892if (Name ==UfmtSymbolicGFX10[Id])

1893return Id;

1894 }

1895 }

1896returnUFMT_UNDEF;

1897}

1898

1899StringRef getUnifiedFormatName(unsigned Id,constMCSubtargetInfo &STI) {

1900if(isValidUnifiedFormat(Id, STI))

1901returnisGFX10(STI) ?UfmtSymbolicGFX10[Id] :UfmtSymbolicGFX11[Id];

1902return"";

1903}

1904

1905boolisValidUnifiedFormat(unsigned Id,constMCSubtargetInfo &STI) {

1906returnisGFX10(STI) ? Id <=UfmtGFX10::UFMT_LAST : Id <=UfmtGFX11::UFMT_LAST;

1907}

1908

1909int64_tconvertDfmtNfmt2Ufmt(unsigned Dfmt,unsigned Nfmt,

1910constMCSubtargetInfo &STI) {

1911 int64_t Fmt =encodeDfmtNfmt(Dfmt, Nfmt);

1912if (isGFX11Plus(STI)) {

1913for (int Id =UfmtGFX11::UFMT_FIRST; Id <=UfmtGFX11::UFMT_LAST; ++Id) {

1914if (Fmt ==DfmtNfmt2UFmtGFX11[Id])

1915return Id;

1916 }

1917 }else {

1918for (int Id =UfmtGFX10::UFMT_FIRST; Id <=UfmtGFX10::UFMT_LAST; ++Id) {

1919if (Fmt ==DfmtNfmt2UFmtGFX10[Id])

1920return Id;

1921 }

1922 }

1923returnUFMT_UNDEF;

1924}

1925

1926boolisValidFormatEncoding(unsigned Val,constMCSubtargetInfo &STI) {

1927returnisGFX10Plus(STI) ? (Val <=UFMT_MAX) : (Val <=DFMT_NFMT_MAX);

1928}

1929

1930unsignedgetDefaultFormatEncoding(constMCSubtargetInfo &STI) {

1931if (isGFX10Plus(STI))

1932returnUFMT_DEFAULT;

1933returnDFMT_NFMT_DEFAULT;

1934}

1935

1936}// namespace MTBUFFormat

1937

1938//===----------------------------------------------------------------------===//

1939// SendMsg

1940//===----------------------------------------------------------------------===//

1941

1942namespaceSendMsg {

1943

1944staticuint64_t getMsgIdMask(constMCSubtargetInfo &STI) {

1945returnisGFX11Plus(STI) ?ID_MASK_GFX11Plus_ :ID_MASK_PreGFX11_;

1946}

1947

1948boolisValidMsgId(int64_t MsgId,constMCSubtargetInfo &STI) {

1949return (MsgId & ~(getMsgIdMask(STI))) == 0;

1950}

1951

1952boolisValidMsgOp(int64_t MsgId, int64_t OpId,constMCSubtargetInfo &STI,

1953bool Strict) {

1954assert(isValidMsgId(MsgId, STI));

1955

1956if (!Strict)

1957return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);

1958

1959if (msgRequiresOp(MsgId, STI)) {

1960if (MsgId ==ID_GS_PreGFX11 && OpId ==OP_GS_NOP)

1961returnfalse;

1962

1963return !getMsgOpName(MsgId, OpId, STI).empty();

1964 }

1965

1966return OpId ==OP_NONE_;

1967}

1968

1969boolisValidMsgStream(int64_t MsgId, int64_t OpId, int64_tStreamId,

1970constMCSubtargetInfo &STI,bool Strict) {

1971assert(isValidMsgOp(MsgId, OpId, STI, Strict));

1972

1973if (!Strict)

1974return 0 <=StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);

1975

1976if (!isGFX11Plus(STI)) {

1977switch (MsgId) {

1978caseID_GS_PreGFX11:

1979returnSTREAM_ID_FIRST_ <=StreamId &&StreamId <STREAM_ID_LAST_;

1980caseID_GS_DONE_PreGFX11:

1981return (OpId ==OP_GS_NOP) ?

1982 (StreamId ==STREAM_ID_NONE_) :

1983 (STREAM_ID_FIRST_ <=StreamId &&StreamId <STREAM_ID_LAST_);

1984 }

1985 }

1986returnStreamId ==STREAM_ID_NONE_;

1987}

1988

1989boolmsgRequiresOp(int64_t MsgId,constMCSubtargetInfo &STI) {

1990return MsgId ==ID_SYSMSG ||

1991 (!isGFX11Plus(STI) &&

1992 (MsgId ==ID_GS_PreGFX11 || MsgId ==ID_GS_DONE_PreGFX11));

1993}

1994

1995boolmsgSupportsStream(int64_t MsgId, int64_t OpId,

1996constMCSubtargetInfo &STI) {

1997return !isGFX11Plus(STI) &&

1998 (MsgId ==ID_GS_PreGFX11 || MsgId ==ID_GS_DONE_PreGFX11) &&

1999 OpId !=OP_GS_NOP;

2000}

2001

2002voiddecodeMsg(unsigned Val,uint16_t &MsgId,uint16_t &OpId,

2003uint16_t &StreamId,constMCSubtargetInfo &STI) {

2004 MsgId = Val &getMsgIdMask(STI);

2005if (isGFX11Plus(STI)) {

2006 OpId = 0;

2007StreamId = 0;

2008 }else {

2009 OpId = (Val &OP_MASK_) >>OP_SHIFT_;

2010StreamId = (Val &STREAM_ID_MASK_) >>STREAM_ID_SHIFT_;

2011 }

2012}

2013

2014uint64_t encodeMsg(uint64_t MsgId,

2015uint64_t OpId,

2016uint64_t StreamId) {

2017return MsgId | (OpId <<OP_SHIFT_) | (StreamId <<STREAM_ID_SHIFT_);

2018}

2019

2020}// namespace SendMsg

2021

2022//===----------------------------------------------------------------------===//

2023//

2024//===----------------------------------------------------------------------===//

2025

2026unsignedgetInitialPSInputAddr(constFunction &F) {

2027returnF.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);

2028}

2029

2030boolgetHasColorExport(constFunction &F) {

2031// As a safe default always respond as if PS has color exports.

2032returnF.getFnAttributeAsParsedInteger(

2033"amdgpu-color-export",

2034F.getCallingConv() ==CallingConv::AMDGPU_PS ? 1 : 0) != 0;

2035}

2036

2037boolgetHasDepthExport(constFunction &F) {

2038returnF.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;

2039}

2040

2041boolisShader(CallingConv::ID cc) {

2042switch(cc) {

2043caseCallingConv::AMDGPU_VS:

2044caseCallingConv::AMDGPU_LS:

2045caseCallingConv::AMDGPU_HS:

2046caseCallingConv::AMDGPU_ES:

2047caseCallingConv::AMDGPU_GS:

2048caseCallingConv::AMDGPU_PS:

2049caseCallingConv::AMDGPU_CS_Chain:

2050caseCallingConv::AMDGPU_CS_ChainPreserve:

2051caseCallingConv::AMDGPU_CS:

2052returntrue;

2053default:

2054returnfalse;

2055 }

2056}

2057

2058boolisGraphics(CallingConv::ID cc) {

2059returnisShader(cc) || cc ==CallingConv::AMDGPU_Gfx;

2060}

2061

2062boolisCompute(CallingConv::ID cc) {

2063return !isGraphics(cc) || cc ==CallingConv::AMDGPU_CS;

2064}

2065

2066boolisEntryFunctionCC(CallingConv::ID CC) {

2067switch (CC) {

2068caseCallingConv::AMDGPU_KERNEL:

2069caseCallingConv::SPIR_KERNEL:

2070caseCallingConv::AMDGPU_VS:

2071caseCallingConv::AMDGPU_GS:

2072caseCallingConv::AMDGPU_PS:

2073caseCallingConv::AMDGPU_CS:

2074caseCallingConv::AMDGPU_ES:

2075caseCallingConv::AMDGPU_HS:

2076caseCallingConv::AMDGPU_LS:

2077returntrue;

2078default:

2079returnfalse;

2080 }

2081}

2082

2083boolisModuleEntryFunctionCC(CallingConv::ID CC) {

2084switch (CC) {

2085caseCallingConv::AMDGPU_Gfx:

2086returntrue;

2087default:

2088returnisEntryFunctionCC(CC) ||isChainCC(CC);

2089 }

2090}

2091

2092boolisChainCC(CallingConv::ID CC) {

2093switch (CC) {

2094caseCallingConv::AMDGPU_CS_Chain:

2095caseCallingConv::AMDGPU_CS_ChainPreserve:

2096returntrue;

2097default:

2098returnfalse;

2099 }

2100}

2101

2102boolisKernelCC(constFunction *Func) {

2103returnAMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());

2104}

2105

2106boolhasXNACK(constMCSubtargetInfo &STI) {

2107return STI.hasFeature(AMDGPU::FeatureXNACK);

2108}

2109

2110boolhasSRAMECC(constMCSubtargetInfo &STI) {

2111return STI.hasFeature(AMDGPU::FeatureSRAMECC);

2112}

2113

2114boolhasMIMG_R128(constMCSubtargetInfo &STI) {

2115return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);

2116}

2117

2118boolhasA16(constMCSubtargetInfo &STI) {

2119return STI.hasFeature(AMDGPU::FeatureA16);

2120}

2121

2122boolhasG16(constMCSubtargetInfo &STI) {

2123return STI.hasFeature(AMDGPU::FeatureG16);

2124}

2125

2126boolhasPackedD16(constMCSubtargetInfo &STI) {

2127return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&

2128 !isSI(STI);

2129}

2130

2131boolhasGDS(constMCSubtargetInfo &STI) {

2132return STI.hasFeature(AMDGPU::FeatureGDS);

2133}

2134

2135unsignedgetNSAMaxSize(constMCSubtargetInfo &STI,bool HasSampler) {

2136autoVersion =getIsaVersion(STI.getCPU());

2137if (Version.Major == 10)

2138returnVersion.Minor >= 3 ? 13 : 5;

2139if (Version.Major == 11)

2140return 5;

2141if (Version.Major >= 12)

2142return HasSampler ? 4 : 5;

2143return 0;

2144}

2145

2146unsignedgetMaxNumUserSGPRs(constMCSubtargetInfo &STI) {return 16; }

2147

2148boolisSI(constMCSubtargetInfo &STI) {

2149return STI.hasFeature(AMDGPU::FeatureSouthernIslands);

2150}

2151

2152boolisCI(constMCSubtargetInfo &STI) {

2153return STI.hasFeature(AMDGPU::FeatureSeaIslands);

2154}

2155

2156boolisVI(constMCSubtargetInfo &STI) {

2157return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);

2158}

2159

2160boolisGFX9(constMCSubtargetInfo &STI) {

2161return STI.hasFeature(AMDGPU::FeatureGFX9);

2162}

2163

2164boolisGFX9_GFX10(constMCSubtargetInfo &STI) {

2165returnisGFX9(STI) ||isGFX10(STI);

2166}

2167

2168boolisGFX9_GFX10_GFX11(constMCSubtargetInfo &STI) {

2169returnisGFX9(STI) ||isGFX10(STI) ||isGFX11(STI);

2170}

2171

2172boolisGFX8_GFX9_GFX10(constMCSubtargetInfo &STI) {

2173returnisVI(STI) ||isGFX9(STI) ||isGFX10(STI);

2174}

2175

2176boolisGFX8Plus(constMCSubtargetInfo &STI) {

2177returnisVI(STI) ||isGFX9Plus(STI);

2178}

2179

2180boolisGFX9Plus(constMCSubtargetInfo &STI) {

2181returnisGFX9(STI) ||isGFX10Plus(STI);

2182}

2183

2184boolisNotGFX9Plus(constMCSubtargetInfo &STI) {return !isGFX9Plus(STI); }

2185

2186boolisGFX10(constMCSubtargetInfo &STI) {

2187return STI.hasFeature(AMDGPU::FeatureGFX10);

2188}

2189

2190boolisGFX10_GFX11(constMCSubtargetInfo &STI) {

2191returnisGFX10(STI) ||isGFX11(STI);

2192}

2193

2194boolisGFX10Plus(constMCSubtargetInfo &STI) {

2195returnisGFX10(STI) ||isGFX11Plus(STI);

2196}

2197

2198boolisGFX11(constMCSubtargetInfo &STI) {

2199return STI.hasFeature(AMDGPU::FeatureGFX11);

2200}

2201

2202boolisGFX11Plus(constMCSubtargetInfo &STI) {

2203returnisGFX11(STI) ||isGFX12Plus(STI);

2204}

2205

2206boolisGFX12(constMCSubtargetInfo &STI) {

2207return STI.getFeatureBits()[AMDGPU::FeatureGFX12];

2208}

2209

2210boolisGFX12Plus(constMCSubtargetInfo &STI) {returnisGFX12(STI); }

2211

2212boolisNotGFX12Plus(constMCSubtargetInfo &STI) {return !isGFX12Plus(STI); }

2213

2214boolisNotGFX11Plus(constMCSubtargetInfo &STI) {

2215return !isGFX11Plus(STI);

2216}

2217

2218boolisNotGFX10Plus(constMCSubtargetInfo &STI) {

2219returnisSI(STI) ||isCI(STI) ||isVI(STI) ||isGFX9(STI);

2220}

2221

2222boolisGFX10Before1030(constMCSubtargetInfo &STI) {

2223returnisGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);

2224}

2225

2226boolisGCN3Encoding(constMCSubtargetInfo &STI) {

2227return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);

2228}

2229

2230boolisGFX10_AEncoding(constMCSubtargetInfo &STI) {

2231return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);

2232}

2233

2234boolisGFX10_BEncoding(constMCSubtargetInfo &STI) {

2235return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);

2236}

2237

2238boolhasGFX10_3Insts(constMCSubtargetInfo &STI) {

2239return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);

2240}

2241

2242boolisGFX10_3_GFX11(constMCSubtargetInfo &STI) {

2243returnisGFX10_BEncoding(STI) && !isGFX12Plus(STI);

2244}

2245

2246boolisGFX90A(constMCSubtargetInfo &STI) {

2247return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);

2248}

2249

2250boolisGFX940(constMCSubtargetInfo &STI) {

2251return STI.hasFeature(AMDGPU::FeatureGFX940Insts);

2252}

2253

2254boolhasArchitectedFlatScratch(constMCSubtargetInfo &STI) {

2255return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);

2256}

2257

2258boolhasMAIInsts(constMCSubtargetInfo &STI) {

2259return STI.hasFeature(AMDGPU::FeatureMAIInsts);

2260}

2261

2262boolhasVOPD(constMCSubtargetInfo &STI) {

2263return STI.hasFeature(AMDGPU::FeatureVOPD);

2264}

2265

2266boolhasDPPSrc1SGPR(constMCSubtargetInfo &STI) {

2267return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);

2268}

2269

2270unsignedhasKernargPreload(constMCSubtargetInfo &STI) {

2271return STI.hasFeature(AMDGPU::FeatureKernargPreload);

2272}

2273

2274int32_tgetTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,

2275 int32_t ArgNumVGPR) {

2276if (has90AInsts && ArgNumAGPR)

2277returnalignTo(ArgNumVGPR, 4) + ArgNumAGPR;

2278return std::max(ArgNumVGPR, ArgNumAGPR);

2279}

2280

2281boolisSGPR(MCRegister Reg,constMCRegisterInfo *TRI) {

2282constMCRegisterClass SGPRClass =TRI->getRegClass(AMDGPU::SReg_32RegClassID);

2283constMCRegister FirstSubReg =TRI->getSubReg(Reg, AMDGPU::sub0);

2284return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg :Reg) ||

2285Reg == AMDGPU::SCC;

2286}

2287

2288boolisHi16Reg(MCRegister Reg,constMCRegisterInfo &MRI) {

2289returnMRI.getEncodingValue(Reg) &AMDGPU::HWEncoding::IS_HI16;

2290}

2291

2292#define MAP_REG2REG \

2293 using namespace AMDGPU; \

2294 switch(Reg.id()) { \

2295 default: return Reg; \

2296 CASE_CI_VI(FLAT_SCR) \

2297 CASE_CI_VI(FLAT_SCR_LO) \

2298 CASE_CI_VI(FLAT_SCR_HI) \

2299 CASE_VI_GFX9PLUS(TTMP0) \

2300 CASE_VI_GFX9PLUS(TTMP1) \

2301 CASE_VI_GFX9PLUS(TTMP2) \

2302 CASE_VI_GFX9PLUS(TTMP3) \

2303 CASE_VI_GFX9PLUS(TTMP4) \

2304 CASE_VI_GFX9PLUS(TTMP5) \

2305 CASE_VI_GFX9PLUS(TTMP6) \

2306 CASE_VI_GFX9PLUS(TTMP7) \

2307 CASE_VI_GFX9PLUS(TTMP8) \

2308 CASE_VI_GFX9PLUS(TTMP9) \

2309 CASE_VI_GFX9PLUS(TTMP10) \

2310 CASE_VI_GFX9PLUS(TTMP11) \

2311 CASE_VI_GFX9PLUS(TTMP12) \

2312 CASE_VI_GFX9PLUS(TTMP13) \

2313 CASE_VI_GFX9PLUS(TTMP14) \

2314 CASE_VI_GFX9PLUS(TTMP15) \

2315 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \

2316 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \

2317 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \

2318 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \

2319 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \

2320 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \

2321 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \

2322 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \

2323 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \

2324 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \

2325 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \

2326 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \

2327 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \

2328 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \

2329 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \

2330 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \

2331 CASE_GFXPRE11_GFX11PLUS(M0) \

2332 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \

2333 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \

2334 }

2335

2336#define CASE_CI_VI(node) \

2337 assert(!isSI(STI)); \

2338 case node: return isCI(STI) ? node##_ci : node##_vi;

2339

2340#define CASE_VI_GFX9PLUS(node) \

2341 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;

2342

2343#define CASE_GFXPRE11_GFX11PLUS(node) \

2344 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;

2345

2346#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \

2347 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;

2348

2349MCRegister getMCReg(MCRegister Reg,constMCSubtargetInfo &STI) {

2350if (STI.getTargetTriple().getArch() ==Triple::r600)

2351returnReg;

2352MAP_REG2REG

2353}

2354

2355#undef CASE_CI_VI

2356#undef CASE_VI_GFX9PLUS

2357#undef CASE_GFXPRE11_GFX11PLUS

2358#undef CASE_GFXPRE11_GFX11PLUS_TO

2359

2360#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;

2361#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;

2362#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;

2363#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)

2364

2365MCRegister mc2PseudoReg(MCRegister Reg) {MAP_REG2REG }

2366

2367boolisInlineValue(unsignedReg) {

2368switch (Reg) {

2369case AMDGPU::SRC_SHARED_BASE_LO:

2370case AMDGPU::SRC_SHARED_BASE:

2371case AMDGPU::SRC_SHARED_LIMIT_LO:

2372case AMDGPU::SRC_SHARED_LIMIT:

2373case AMDGPU::SRC_PRIVATE_BASE_LO:

2374case AMDGPU::SRC_PRIVATE_BASE:

2375case AMDGPU::SRC_PRIVATE_LIMIT_LO:

2376case AMDGPU::SRC_PRIVATE_LIMIT:

2377case AMDGPU::SRC_POPS_EXITING_WAVE_ID:

2378returntrue;

2379case AMDGPU::SRC_VCCZ:

2380case AMDGPU::SRC_EXECZ:

2381case AMDGPU::SRC_SCC:

2382returntrue;

2383case AMDGPU::SGPR_NULL:

2384returntrue;

2385default:

2386returnfalse;

2387 }

2388}

2389

2390#undef CASE_CI_VI

2391#undef CASE_VI_GFX9PLUS

2392#undef CASE_GFXPRE11_GFX11PLUS

2393#undef CASE_GFXPRE11_GFX11PLUS_TO

2394#undef MAP_REG2REG

2395

2396boolisSISrcOperand(constMCInstrDesc &Desc,unsigned OpNo) {

2397assert(OpNo <Desc.NumOperands);

2398unsigned OpType =Desc.operands()[OpNo].OperandType;

2399return OpType >=AMDGPU::OPERAND_SRC_FIRST &&

2400 OpType <=AMDGPU::OPERAND_SRC_LAST;

2401}

2402

2403boolisKImmOperand(constMCInstrDesc &Desc,unsigned OpNo) {

2404assert(OpNo <Desc.NumOperands);

2405unsigned OpType =Desc.operands()[OpNo].OperandType;

2406return OpType >=AMDGPU::OPERAND_KIMM_FIRST &&

2407 OpType <=AMDGPU::OPERAND_KIMM_LAST;

2408}

2409

2410boolisSISrcFPOperand(constMCInstrDesc &Desc,unsigned OpNo) {

2411assert(OpNo <Desc.NumOperands);

2412unsigned OpType =Desc.operands()[OpNo].OperandType;

2413switch (OpType) {

2414caseAMDGPU::OPERAND_REG_IMM_FP32:

2415caseAMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:

2416caseAMDGPU::OPERAND_REG_IMM_FP64:

2417caseAMDGPU::OPERAND_REG_IMM_FP16:

2418caseAMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:

2419caseAMDGPU::OPERAND_REG_IMM_V2FP16:

2420caseAMDGPU::OPERAND_REG_INLINE_C_FP32:

2421caseAMDGPU::OPERAND_REG_INLINE_C_FP64:

2422caseAMDGPU::OPERAND_REG_INLINE_C_FP16:

2423caseAMDGPU::OPERAND_REG_INLINE_C_V2FP16:

2424caseAMDGPU::OPERAND_REG_INLINE_AC_FP32:

2425caseAMDGPU::OPERAND_REG_INLINE_AC_FP16:

2426caseAMDGPU::OPERAND_REG_INLINE_AC_V2FP16:

2427caseAMDGPU::OPERAND_REG_IMM_V2FP32:

2428caseAMDGPU::OPERAND_REG_INLINE_C_V2FP32:

2429caseAMDGPU::OPERAND_REG_INLINE_AC_FP64:

2430returntrue;

2431default:

2432returnfalse;

2433 }

2434}

2435

2436boolisSISrcInlinableOperand(constMCInstrDesc &Desc,unsigned OpNo) {

2437assert(OpNo <Desc.NumOperands);

2438unsigned OpType =Desc.operands()[OpNo].OperandType;

2439return (OpType >=AMDGPU::OPERAND_REG_INLINE_C_FIRST &&

2440 OpType <=AMDGPU::OPERAND_REG_INLINE_C_LAST) ||

2441 (OpType >=AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&

2442 OpType <=AMDGPU::OPERAND_REG_INLINE_AC_LAST);

2443}

2444

2445// Avoid using MCRegisterClass::getSize, since that function will go away

2446// (move from MC* level to Target* level). Return size in bits.

2447unsignedgetRegBitWidth(unsigned RCID) {

2448switch (RCID) {

2449case AMDGPU::SGPR_LO16RegClassID:

2450case AMDGPU::AGPR_LO16RegClassID:

2451return 16;

2452case AMDGPU::SGPR_32RegClassID:

2453case AMDGPU::VGPR_32RegClassID:

2454case AMDGPU::VRegOrLds_32RegClassID:

2455case AMDGPU::AGPR_32RegClassID:

2456case AMDGPU::VS_32RegClassID:

2457case AMDGPU::AV_32RegClassID:

2458case AMDGPU::SReg_32RegClassID:

2459case AMDGPU::SReg_32_XM0RegClassID:

2460case AMDGPU::SRegOrLds_32RegClassID:

2461return 32;

2462case AMDGPU::SGPR_64RegClassID:

2463case AMDGPU::VS_64RegClassID:

2464case AMDGPU::SReg_64RegClassID:

2465case AMDGPU::VReg_64RegClassID:

2466case AMDGPU::AReg_64RegClassID:

2467case AMDGPU::SReg_64_XEXECRegClassID:

2468case AMDGPU::VReg_64_Align2RegClassID:

2469case AMDGPU::AReg_64_Align2RegClassID:

2470case AMDGPU::AV_64RegClassID:

2471case AMDGPU::AV_64_Align2RegClassID:

2472return 64;

2473case AMDGPU::SGPR_96RegClassID:

2474case AMDGPU::SReg_96RegClassID:

2475case AMDGPU::VReg_96RegClassID:

2476case AMDGPU::AReg_96RegClassID:

2477case AMDGPU::VReg_96_Align2RegClassID:

2478case AMDGPU::AReg_96_Align2RegClassID:

2479case AMDGPU::AV_96RegClassID:

2480case AMDGPU::AV_96_Align2RegClassID:

2481return 96;

2482case AMDGPU::SGPR_128RegClassID:

2483case AMDGPU::SReg_128RegClassID:

2484case AMDGPU::VReg_128RegClassID:

2485case AMDGPU::AReg_128RegClassID:

2486case AMDGPU::VReg_128_Align2RegClassID:

2487case AMDGPU::AReg_128_Align2RegClassID:

2488case AMDGPU::AV_128RegClassID:

2489case AMDGPU::AV_128_Align2RegClassID:

2490case AMDGPU::SReg_128_XNULLRegClassID:

2491return 128;

2492case AMDGPU::SGPR_160RegClassID:

2493case AMDGPU::SReg_160RegClassID:

2494case AMDGPU::VReg_160RegClassID:

2495case AMDGPU::AReg_160RegClassID:

2496case AMDGPU::VReg_160_Align2RegClassID:

2497case AMDGPU::AReg_160_Align2RegClassID:

2498case AMDGPU::AV_160RegClassID:

2499case AMDGPU::AV_160_Align2RegClassID:

2500return 160;

2501case AMDGPU::SGPR_192RegClassID:

2502case AMDGPU::SReg_192RegClassID:

2503case AMDGPU::VReg_192RegClassID:

2504case AMDGPU::AReg_192RegClassID:

2505case AMDGPU::VReg_192_Align2RegClassID:

2506case AMDGPU::AReg_192_Align2RegClassID:

2507case AMDGPU::AV_192RegClassID:

2508case AMDGPU::AV_192_Align2RegClassID:

2509return 192;

2510case AMDGPU::SGPR_224RegClassID:

2511case AMDGPU::SReg_224RegClassID:

2512case AMDGPU::VReg_224RegClassID:

2513case AMDGPU::AReg_224RegClassID:

2514case AMDGPU::VReg_224_Align2RegClassID:

2515case AMDGPU::AReg_224_Align2RegClassID:

2516case AMDGPU::AV_224RegClassID:

2517case AMDGPU::AV_224_Align2RegClassID:

2518return 224;

2519case AMDGPU::SGPR_256RegClassID:

2520case AMDGPU::SReg_256RegClassID:

2521case AMDGPU::VReg_256RegClassID:

2522case AMDGPU::AReg_256RegClassID:

2523case AMDGPU::VReg_256_Align2RegClassID:

2524case AMDGPU::AReg_256_Align2RegClassID:

2525case AMDGPU::AV_256RegClassID:

2526case AMDGPU::AV_256_Align2RegClassID:

2527case AMDGPU::SReg_256_XNULLRegClassID:

2528return 256;

2529case AMDGPU::SGPR_288RegClassID:

2530case AMDGPU::SReg_288RegClassID:

2531case AMDGPU::VReg_288RegClassID:

2532case AMDGPU::AReg_288RegClassID:

2533case AMDGPU::VReg_288_Align2RegClassID:

2534case AMDGPU::AReg_288_Align2RegClassID:

2535case AMDGPU::AV_288RegClassID:

2536case AMDGPU::AV_288_Align2RegClassID:

2537return 288;

2538case AMDGPU::SGPR_320RegClassID:

2539case AMDGPU::SReg_320RegClassID:

2540case AMDGPU::VReg_320RegClassID:

2541case AMDGPU::AReg_320RegClassID:

2542case AMDGPU::VReg_320_Align2RegClassID:

2543case AMDGPU::AReg_320_Align2RegClassID:

2544case AMDGPU::AV_320RegClassID:

2545case AMDGPU::AV_320_Align2RegClassID:

2546return 320;

2547case AMDGPU::SGPR_352RegClassID:

2548case AMDGPU::SReg_352RegClassID:

2549case AMDGPU::VReg_352RegClassID:

2550case AMDGPU::AReg_352RegClassID:

2551case AMDGPU::VReg_352_Align2RegClassID:

2552case AMDGPU::AReg_352_Align2RegClassID:

2553case AMDGPU::AV_352RegClassID:

2554case AMDGPU::AV_352_Align2RegClassID:

2555return 352;

2556case AMDGPU::SGPR_384RegClassID:

2557case AMDGPU::SReg_384RegClassID:

2558case AMDGPU::VReg_384RegClassID:

2559case AMDGPU::AReg_384RegClassID:

2560case AMDGPU::VReg_384_Align2RegClassID:

2561case AMDGPU::AReg_384_Align2RegClassID:

2562case AMDGPU::AV_384RegClassID:

2563case AMDGPU::AV_384_Align2RegClassID:

2564return 384;

2565case AMDGPU::SGPR_512RegClassID:

2566case AMDGPU::SReg_512RegClassID:

2567case AMDGPU::VReg_512RegClassID:

2568case AMDGPU::AReg_512RegClassID:

2569case AMDGPU::VReg_512_Align2RegClassID:

2570case AMDGPU::AReg_512_Align2RegClassID:

2571case AMDGPU::AV_512RegClassID:

2572case AMDGPU::AV_512_Align2RegClassID:

2573return 512;

2574case AMDGPU::SGPR_1024RegClassID:

2575case AMDGPU::SReg_1024RegClassID:

2576case AMDGPU::VReg_1024RegClassID:

2577case AMDGPU::AReg_1024RegClassID:

2578case AMDGPU::VReg_1024_Align2RegClassID:

2579case AMDGPU::AReg_1024_Align2RegClassID:

2580case AMDGPU::AV_1024RegClassID:

2581case AMDGPU::AV_1024_Align2RegClassID:

2582return 1024;

2583default:

2584llvm_unreachable("Unexpected register class");

2585 }

2586}

2587

2588unsignedgetRegBitWidth(constMCRegisterClass &RC) {

2589returngetRegBitWidth(RC.getID());

2590}

2591

2592unsignedgetRegOperandSize(constMCRegisterInfo *MRI,constMCInstrDesc &Desc,

2593unsigned OpNo) {

2594assert(OpNo <Desc.NumOperands);

2595unsigned RCID =Desc.operands()[OpNo].RegClass;

2596returngetRegBitWidth(RCID) / 8;

2597}

2598

2599boolisInlinableLiteral64(int64_tLiteral,bool HasInv2Pi) {

2600if (isInlinableIntLiteral(Literal))

2601returntrue;

2602

2603uint64_t Val =static_cast<uint64_t>(Literal);

2604return (Val == llvm::bit_cast<uint64_t>(0.0)) ||

2605 (Val == llvm::bit_cast<uint64_t>(1.0)) ||

2606 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||

2607 (Val == llvm::bit_cast<uint64_t>(0.5)) ||

2608 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||

2609 (Val == llvm::bit_cast<uint64_t>(2.0)) ||

2610 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||

2611 (Val == llvm::bit_cast<uint64_t>(4.0)) ||

2612 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||

2613 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);

2614}

2615

2616boolisInlinableLiteral32(int32_tLiteral,bool HasInv2Pi) {

2617if (isInlinableIntLiteral(Literal))

2618returntrue;

2619

2620// The actual type of the operand does not seem to matter as long

2621// as the bits match one of the inline immediate values. For example:

2622//

2623// -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,

2624// so it is a legal inline immediate.

2625//

2626// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in

2627// floating-point, so it is a legal inline immediate.

2628

2629uint32_t Val =static_cast<uint32_t>(Literal);

2630return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||

2631 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||

2632 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||

2633 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||

2634 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||

2635 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||

2636 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||

2637 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||

2638 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||

2639 (Val == 0x3e22f983 && HasInv2Pi);

2640}

2641

2642boolisInlinableLiteralBF16(int16_tLiteral,bool HasInv2Pi) {

2643if (!HasInv2Pi)

2644returnfalse;

2645if (isInlinableIntLiteral(Literal))

2646returntrue;

2647uint16_t Val =static_cast<uint16_t>(Literal);

2648return Val == 0x3F00 ||// 0.5

2649 Val == 0xBF00 ||// -0.5

2650 Val == 0x3F80 ||// 1.0

2651 Val == 0xBF80 ||// -1.0

2652 Val == 0x4000 ||// 2.0

2653 Val == 0xC000 ||// -2.0

2654 Val == 0x4080 ||// 4.0

2655 Val == 0xC080 ||// -4.0

2656 Val == 0x3E22;// 1.0 / (2.0 * pi)

2657}

2658

2659boolisInlinableLiteralI16(int32_tLiteral,bool HasInv2Pi) {

2660returnisInlinableLiteral32(Literal, HasInv2Pi);

2661}

2662

2663boolisInlinableLiteralFP16(int16_tLiteral,bool HasInv2Pi) {

2664if (!HasInv2Pi)

2665returnfalse;

2666if (isInlinableIntLiteral(Literal))

2667returntrue;

2668uint16_t Val =static_cast<uint16_t>(Literal);

2669return Val == 0x3C00 ||// 1.0

2670 Val == 0xBC00 ||// -1.0

2671 Val == 0x3800 ||// 0.5

2672 Val == 0xB800 ||// -0.5

2673 Val == 0x4000 ||// 2.0

2674 Val == 0xC000 ||// -2.0

2675 Val == 0x4400 ||// 4.0

2676 Val == 0xC400 ||// -4.0

2677 Val == 0x3118;// 1/2pi

2678}

2679

2680std::optional<unsigned>getInlineEncodingV216(bool IsFloat,uint32_t Literal) {

2681// Unfortunately, the Instruction Set Architecture Reference Guide is

2682// misleading about how the inline operands work for (packed) 16-bit

2683// instructions. In a nutshell, the actual HW behavior is:

2684//

2685// - integer encodings (-16 .. 64) are always produced as sign-extended

2686// 32-bit values

2687// - float encodings are produced as:

2688// - for F16 instructions: corresponding half-precision float values in

2689// the LSBs, 0 in the MSBs

2690// - for UI16 instructions: corresponding single-precision float value

2691 int32_tSigned =static_cast<int32_t>(Literal);

2692if (Signed >= 0 &&Signed <= 64)

2693return 128 +Signed;

2694

2695if (Signed >= -16 &&Signed <= -1)

2696return 192 + std::abs(Signed);

2697

2698if (IsFloat) {

2699// clang-format off

2700switch (Literal) {

2701case 0x3800:return 240;// 0.5

2702case 0xB800:return 241;// -0.5

2703case 0x3C00:return 242;// 1.0

2704case 0xBC00:return 243;// -1.0

2705case 0x4000:return 244;// 2.0

2706case 0xC000:return 245;// -2.0

2707case 0x4400:return 246;// 4.0

2708case 0xC400:return 247;// -4.0

2709case 0x3118:return 248;// 1.0 / (2.0 * pi)

2710default:break;

2711 }

2712// clang-format on

2713 }else {

2714// clang-format off

2715switch (Literal) {

2716case 0x3F000000:return 240;// 0.5

2717case 0xBF000000:return 241;// -0.5

2718case 0x3F800000:return 242;// 1.0

2719case 0xBF800000:return 243;// -1.0

2720case 0x40000000:return 244;// 2.0

2721case 0xC0000000:return 245;// -2.0

2722case 0x40800000:return 246;// 4.0

2723case 0xC0800000:return 247;// -4.0

2724case 0x3E22F983:return 248;// 1.0 / (2.0 * pi)

2725default:break;

2726 }

2727// clang-format on

2728 }

2729

2730return {};

2731}

2732

2733// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction

2734// or nullopt.

2735std::optional<unsigned>getInlineEncodingV2I16(uint32_t Literal) {

2736returngetInlineEncodingV216(false,Literal);

2737}

2738

2739// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction

2740// or nullopt.

2741std::optional<unsigned>getInlineEncodingV2BF16(uint32_t Literal) {

2742 int32_tSigned =static_cast<int32_t>(Literal);

2743if (Signed >= 0 &&Signed <= 64)

2744return 128 +Signed;

2745

2746if (Signed >= -16 &&Signed <= -1)

2747return 192 + std::abs(Signed);

2748

2749// clang-format off

2750switch (Literal) {

2751case 0x3F00:return 240;// 0.5

2752case 0xBF00:return 241;// -0.5

2753case 0x3F80:return 242;// 1.0

2754case 0xBF80:return 243;// -1.0

2755case 0x4000:return 244;// 2.0

2756case 0xC000:return 245;// -2.0

2757case 0x4080:return 246;// 4.0

2758case 0xC080:return 247;// -4.0

2759case 0x3E22:return 248;// 1.0 / (2.0 * pi)

2760default:break;

2761 }

2762// clang-format on

2763

2764return std::nullopt;

2765}

2766

2767// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction

2768// or nullopt.

2769std::optional<unsigned>getInlineEncodingV2F16(uint32_t Literal) {

2770returngetInlineEncodingV216(true,Literal);

2771}

2772

2773// Whether the given literal can be inlined for a V_PK_* instruction.

2774boolisInlinableLiteralV216(uint32_t Literal,uint8_t OpType) {

2775switch (OpType) {

2776caseAMDGPU::OPERAND_REG_IMM_V2INT16:

2777caseAMDGPU::OPERAND_REG_INLINE_C_V2INT16:

2778caseAMDGPU::OPERAND_REG_INLINE_AC_V2INT16:

2779returngetInlineEncodingV216(false,Literal).has_value();

2780caseAMDGPU::OPERAND_REG_IMM_V2FP16:

2781caseAMDGPU::OPERAND_REG_INLINE_C_V2FP16:

2782caseAMDGPU::OPERAND_REG_INLINE_AC_V2FP16:

2783returngetInlineEncodingV216(true,Literal).has_value();

2784caseAMDGPU::OPERAND_REG_IMM_V2BF16:

2785caseAMDGPU::OPERAND_REG_INLINE_C_V2BF16:

2786caseAMDGPU::OPERAND_REG_INLINE_AC_V2BF16:

2787returnisInlinableLiteralV2BF16(Literal);

2788default:

2789llvm_unreachable("bad packed operand type");

2790 }

2791}

2792

2793// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.

2794boolisInlinableLiteralV2I16(uint32_t Literal) {

2795returngetInlineEncodingV2I16(Literal).has_value();

2796}

2797

2798// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.

2799boolisInlinableLiteralV2BF16(uint32_t Literal) {

2800returngetInlineEncodingV2BF16(Literal).has_value();

2801}

2802

2803// Whether the given literal can be inlined for a V_PK_*_F16 instruction.

2804boolisInlinableLiteralV2F16(uint32_t Literal) {

2805returngetInlineEncodingV2F16(Literal).has_value();

2806}

2807

2808boolisValid32BitLiteral(uint64_t Val,bool IsFP64) {

2809if (IsFP64)

2810return !(Val & 0xffffffffu);

2811

2812return isUInt<32>(Val) || isInt<32>(Val);

2813}

2814

2815boolisArgPassedInSGPR(constArgument *A) {

2816constFunction *F =A->getParent();

2817

2818// Arguments to compute shaders are never a source of divergence.

2819CallingConv::ID CC =F->getCallingConv();

2820switch (CC) {

2821caseCallingConv::AMDGPU_KERNEL:

2822caseCallingConv::SPIR_KERNEL:

2823returntrue;

2824caseCallingConv::AMDGPU_VS:

2825caseCallingConv::AMDGPU_LS:

2826caseCallingConv::AMDGPU_HS:

2827caseCallingConv::AMDGPU_ES:

2828caseCallingConv::AMDGPU_GS:

2829caseCallingConv::AMDGPU_PS:

2830caseCallingConv::AMDGPU_CS:

2831caseCallingConv::AMDGPU_Gfx:

2832caseCallingConv::AMDGPU_CS_Chain:

2833caseCallingConv::AMDGPU_CS_ChainPreserve:

2834// For non-compute shaders, SGPR inputs are marked with either inreg or

2835// byval. Everything else is in VGPRs.

2836returnA->hasAttribute(Attribute::InReg) ||

2837A->hasAttribute(Attribute::ByVal);

2838default:

2839// TODO: treat i1 as divergent?

2840returnA->hasAttribute(Attribute::InReg);

2841 }

2842}

2843

2844boolisArgPassedInSGPR(constCallBase *CB,unsigned ArgNo) {

2845// Arguments to compute shaders are never a source of divergence.

2846CallingConv::ID CC = CB->getCallingConv();

2847switch (CC) {

2848caseCallingConv::AMDGPU_KERNEL:

2849caseCallingConv::SPIR_KERNEL:

2850returntrue;

2851caseCallingConv::AMDGPU_VS:

2852caseCallingConv::AMDGPU_LS:

2853caseCallingConv::AMDGPU_HS:

2854caseCallingConv::AMDGPU_ES:

2855caseCallingConv::AMDGPU_GS:

2856caseCallingConv::AMDGPU_PS:

2857caseCallingConv::AMDGPU_CS:

2858caseCallingConv::AMDGPU_Gfx:

2859caseCallingConv::AMDGPU_CS_Chain:

2860caseCallingConv::AMDGPU_CS_ChainPreserve:

2861// For non-compute shaders, SGPR inputs are marked with either inreg or

2862// byval. Everything else is in VGPRs.

2863return CB->paramHasAttr(ArgNo, Attribute::InReg) ||

2864 CB->paramHasAttr(ArgNo, Attribute::ByVal);

2865default:

2866return CB->paramHasAttr(ArgNo, Attribute::InReg);

2867 }

2868}

2869

2870staticboolhasSMEMByteOffset(constMCSubtargetInfo &ST) {

2871returnisGCN3Encoding(ST) ||isGFX10Plus(ST);

2872}

2873

2874boolisLegalSMRDEncodedUnsignedOffset(constMCSubtargetInfo &ST,

2875 int64_t EncodedOffset) {

2876if (isGFX12Plus(ST))

2877return isUInt<23>(EncodedOffset);

2878

2879returnhasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)

2880 : isUInt<8>(EncodedOffset);

2881}

2882

2883boolisLegalSMRDEncodedSignedOffset(constMCSubtargetInfo &ST,

2884 int64_t EncodedOffset,

2885bool IsBuffer) {

2886if (isGFX12Plus(ST))

2887return isInt<24>(EncodedOffset);

2888

2889return !IsBuffer &&

2890hasSMRDSignedImmOffset(ST) &&

2891 isInt<21>(EncodedOffset);

2892}

2893

2894staticboolisDwordAligned(uint64_t ByteOffset) {

2895return (ByteOffset & 3) == 0;

2896}

2897

2898uint64_t convertSMRDOffsetUnits(constMCSubtargetInfo &ST,

2899uint64_t ByteOffset) {

2900if (hasSMEMByteOffset(ST))

2901return ByteOffset;

2902

2903assert(isDwordAligned(ByteOffset));

2904return ByteOffset >> 2;

2905}

2906

2907std::optional<int64_t>getSMRDEncodedOffset(constMCSubtargetInfo &ST,

2908 int64_t ByteOffset,bool IsBuffer,

2909bool HasSOffset) {

2910// For unbuffered smem loads, it is illegal for the Immediate Offset to be

2911// negative if the resulting (Offset + (M0 or SOffset or zero) is negative.

2912// Handle case where SOffset is not present.

2913if (!IsBuffer && !HasSOffset && ByteOffset < 0 &&hasSMRDSignedImmOffset(ST))

2914return std::nullopt;

2915

2916if (isGFX12Plus(ST))// 24 bit signed offsets

2917return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)

2918 : std::nullopt;

2919

2920// The signed version is always a byte offset.

2921if (!IsBuffer &&hasSMRDSignedImmOffset(ST)) {

2922assert(hasSMEMByteOffset(ST));

2923return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)

2924 : std::nullopt;

2925 }

2926

2927if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))

2928return std::nullopt;

2929

2930 int64_t EncodedOffset =convertSMRDOffsetUnits(ST, ByteOffset);

2931returnisLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)

2932 ? std::optional<int64_t>(EncodedOffset)

2933 : std::nullopt;

2934}

2935

2936std::optional<int64_t>getSMRDEncodedLiteralOffset32(constMCSubtargetInfo &ST,

2937 int64_t ByteOffset) {

2938if (!isCI(ST) || !isDwordAligned(ByteOffset))

2939return std::nullopt;

2940

2941 int64_t EncodedOffset =convertSMRDOffsetUnits(ST, ByteOffset);

2942return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)

2943 : std::nullopt;

2944}

2945

2946unsignedgetNumFlatOffsetBits(constMCSubtargetInfo &ST) {

2947if (AMDGPU::isGFX10(ST))

2948return 12;

2949

2950if (AMDGPU::isGFX12(ST))

2951return 24;

2952return 13;

2953}

2954

2955namespace{

2956

2957structSourceOfDivergence {

2958unsignedIntr;

2959};

2960const SourceOfDivergence *lookupSourceOfDivergence(unsignedIntr);

2961

2962structAlwaysUniform {

2963unsignedIntr;

2964};

2965constAlwaysUniform *lookupAlwaysUniform(unsignedIntr);

2966

2967#define GET_SourcesOfDivergence_IMPL

2968#define GET_UniformIntrinsics_IMPL

2969#define GET_Gfx9BufferFormat_IMPL

2970#define GET_Gfx10BufferFormat_IMPL

2971#define GET_Gfx11PlusBufferFormat_IMPL

2972

2973#include "AMDGPUGenSearchableTables.inc"

2974

2975}// end anonymous namespace

2976

2977boolisIntrinsicSourceOfDivergence(unsigned IntrID) {

2978return lookupSourceOfDivergence(IntrID);

2979}

2980

2981boolisIntrinsicAlwaysUniform(unsigned IntrID) {

2982return lookupAlwaysUniform(IntrID);

2983}

2984

2985constGcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,

2986uint8_t NumComponents,

2987uint8_t NumFormat,

2988constMCSubtargetInfo &STI) {

2989returnisGFX11Plus(STI)

2990 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,

2991 NumFormat)

2992 :isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,

2993 NumComponents, NumFormat)

2994 : getGfx9BufferFormatInfo(BitsPerComp,

2995 NumComponents, NumFormat);

2996}

2997

2998constGcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,

2999constMCSubtargetInfo &STI) {

3000returnisGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)

3001 :isGFX10(STI) ? getGfx10BufferFormatInfo(Format)

3002 : getGfx9BufferFormatInfo(Format);

3003}

3004

3005boolhasAny64BitVGPROperands(constMCInstrDesc &OpDesc) {

3006for (autoOpName : { OpName::vdst, OpName::src0, OpName::src1,

3007 OpName::src2 }) {

3008intIdx =getNamedOperandIdx(OpDesc.getOpcode(),OpName);

3009if (Idx == -1)

3010continue;

3011

3012if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||

3013 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)

3014returntrue;

3015 }

3016

3017returnfalse;

3018}

3019

3020boolisDPALU_DPP(constMCInstrDesc &OpDesc) {

3021returnhasAny64BitVGPROperands(OpDesc);

3022}

3023

3024unsignedgetLdsDwGranularity(constMCSubtargetInfo &ST) {

3025// Currently this is 128 for all subtargets

3026return 128;

3027}

3028

3029}// namespace AMDGPU

3030

3031raw_ostream &operator<<(raw_ostream &OS,

3032constAMDGPU::IsaInfo::TargetIDSetting S) {

3033switch (S) {

3034case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):

3035OS <<"Unsupported";

3036break;

3037case (AMDGPU::IsaInfo::TargetIDSetting::Any):

3038OS <<"Any";

3039break;

3040case (AMDGPU::IsaInfo::TargetIDSetting::Off):

3041OS <<"Off";

3042break;

3043case (AMDGPU::IsaInfo::TargetIDSetting::On):

3044OS <<"On";

3045break;

3046 }

3047returnOS;

3048}

3049

3050}// namespace llvm

MRI

unsigned const MachineRegisterInfo * MRI

Definition:AArch64AdvSIMDScalarPass.cpp:105

AMDGPUAsmUtils.h

MAP_REG2REG

#define MAP_REG2REG

Definition:AMDGPUBaseInfo.cpp:2292

Intr

unsigned Intr

Definition:AMDGPUBaseInfo.cpp:2958

DefaultAMDHSACodeObjectVersion

static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))

AMDGPUBaseInfo.h

AMDGPUMCTargetDesc.h

Provides AMDGPU specific target descriptions.

AMDGPU.h

AMDKernelCodeTUtils.h

MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

AMDKernelCodeT.h

AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32

@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32

Definition:AMDKernelCodeT.h:127

Attributes.h

This file contains the simple types necessary to represent the attributes associated with functions a...

ELF.h

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Info

Analysis containing CSE Info

Definition:CSEInfo.cpp:27

CommandLine.h

Constants.h

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

Name

std::string Name

Definition:ELFObjHandler.cpp:77

Index

uint32_t Index

Definition:ELFObjHandler.cpp:83

Size

uint64_t Size

Definition:ELFObjHandler.cpp:81

#define F(x, y, z)

Definition:MD5.cpp:55

TRI

unsigned const TargetRegisterInfo * TRI

Definition:MachineSink.cpp:2029

Reg

unsigned Reg

Definition:MachineSink.cpp:2028

Signed

@ Signed

Definition:NVPTXISelLowering.cpp:4789

auto CC

Definition:RISCVRedundantCopyElimination.cpp:79

S_00B848_MEM_ORDERED

#define S_00B848_MEM_ORDERED(x)

Definition:SIDefines.h:1193

S_00B848_WGP_MODE

#define S_00B848_WGP_MODE(x)

Definition:SIDefines.h:1190

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

DefaultVal

unsigned unsigned DefaultVal

Definition:SPIRVModuleAnalysis.cpp:64

raw_pwrite_stream & OS

Definition:SampleProfWriter.cpp:51

StringExtras.h

This file contains some functions that are useful when dealing with strings.

TargetParser.h

llvm::AMDGPUSubtarget::Generation

Generation

Definition:AMDGPUSubtarget.h:31

llvm::AMDGPUSubtarget::GFX10

@ GFX10

Definition:AMDGPUSubtarget.h:41

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS

@ VOLCANIC_ISLANDS

Definition:AMDGPUSubtarget.h:39

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccSupported

bool isSramEccSupported() const

Definition:AMDGPUBaseInfo.h:180

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setTargetIDFromFeaturesString

void setTargetIDFromFeaturesString(StringRef FS)

Definition:AMDGPUBaseInfo.cpp:815

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getXnackSetting

TargetIDSetting getXnackSetting() const

Definition:AMDGPUBaseInfo.h:170

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::AMDGPUTargetID

AMDGPUTargetID(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:806

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackSupported

bool isXnackSupported() const

Definition:AMDGPUBaseInfo.h:151

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setTargetIDFromTargetIDStream

void setTargetIDFromTargetIDStream(StringRef TargetID)

Definition:AMDGPUBaseInfo.cpp:883

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::toString

std::string toString() const

Definition:AMDGPUBaseInfo.cpp:895

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::getSramEccSetting

TargetIDSetting getSramEccSetting() const

Definition:AMDGPUBaseInfo.h:199

llvm::AMDGPU::VOPD::ComponentInfo

Definition:AMDGPUBaseInfo.h:811

llvm::AMDGPU::VOPD::ComponentInfo::getIndexInParsedOperands

unsigned getIndexInParsedOperands(unsigned CompOprIdx) const

Definition:AMDGPUBaseInfo.cpp:727

llvm::AMDGPU::VOPD::ComponentLayout::getIndexOfDstInParsedOperands

unsigned getIndexOfDstInParsedOperands() const

Definition:AMDGPUBaseInfo.h:791

llvm::AMDGPU::VOPD::ComponentLayout::getIndexOfSrcInParsedOperands

unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const

Definition:AMDGPUBaseInfo.h:796

llvm::AMDGPU::VOPD::ComponentProps::ComponentProps

ComponentProps()=default

llvm::AMDGPU::VOPD::ComponentProps::getCompParsedSrcOperandsNum

unsigned getCompParsedSrcOperandsNum() const

Definition:AMDGPUBaseInfo.h:677

llvm::AMDGPU::VOPD::InstInfo

Definition:AMDGPUBaseInfo.h:828

llvm::AMDGPU::VOPD::InstInfo::getInvalidCompOperandIndex

std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const

Definition:AMDGPUBaseInfo.cpp:741

llvm::AMDGPU::VOPD::InstInfo::RegIndices

std::array< unsigned, Component::MAX_OPR_NUM > RegIndices

Definition:AMDGPUBaseInfo.h:833

llvm::Any

Definition:Any.h:28

llvm::Argument

This class represents an incoming formal argument to a Function.

Definition:Argument.h:31

llvm::Attribute

Definition:Attributes.h:67

llvm::CallBase

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Definition:InstrTypes.h:1112

llvm::CallBase::getCallingConv

CallingConv::ID getCallingConv() const

Definition:InstrTypes.h:1399

llvm::CallBase::paramHasAttr

bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const

Determine whether the argument or parameter has the given attribute.

Definition:Instructions.cpp:409

llvm::DWARFExpression::Operation

This class represents an Operation in the Expression.

Definition:DWARFExpression.h:32

llvm::FeatureBitset::test

constexpr bool test(unsigned I) const

Definition:SubtargetFeature.h:82

llvm::Function

Definition:Function.h:63

llvm::GlobalValue

Definition:GlobalValue.h:48

llvm::GlobalValue::getAddressSpace

unsigned getAddressSpace() const

Definition:GlobalValue.h:206

llvm::LLVMContext

This is an important class for using LLVM in a threaded context.

Definition:LLVMContext.h:67

llvm::LLVMContext::emitError

void emitError(const Instruction *I, const Twine &ErrorStr)

emitError - Emit an error message to the currently installed error handler with optional location inf...

Definition:LLVMContext.cpp:210

llvm::MCInstrDesc

Describe properties that are true of each instruction in the target description file.

Definition:MCInstrDesc.h:198

llvm::MCInstrDesc::getNumOperands

unsigned getNumOperands() const

Return the number of declared MachineOperands for this MachineInstruction.

Definition:MCInstrDesc.h:237

llvm::MCInstrDesc::operands

ArrayRef< MCOperandInfo > operands() const

Definition:MCInstrDesc.h:239

llvm::MCInstrDesc::getNumDefs

unsigned getNumDefs() const

Return the number of MachineOperands that are register definitions.

Definition:MCInstrDesc.h:248

llvm::MCInstrDesc::getOperandConstraint

int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const

Returns the value of the specified operand constraint if it is present.

Definition:MCInstrDesc.h:219

llvm::MCInstrDesc::getOpcode

unsigned getOpcode() const

Return the opcode number for this descriptor.

Definition:MCInstrDesc.h:230

llvm::MCInstrInfo

Interface to description of machine instruction set.

Definition:MCInstrInfo.h:26

llvm::MCInstrInfo::get

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

Definition:MCInstrInfo.h:63

llvm::MCRegisterClass

MCRegisterClass - Base class of TargetRegisterClass.

Definition:MCRegisterInfo.h:35

llvm::MCRegisterClass::getID

unsigned getID() const

getID() - Return the register class ID number.

Definition:MCRegisterInfo.h:53

llvm::MCRegisterClass::contains

bool contains(MCRegister Reg) const

contains - Return true if the specified register is included in this register class.

Definition:MCRegisterInfo.h:73

llvm::MCRegisterInfo

MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...

Definition:MCRegisterInfo.h:149

llvm::MCRegister

Wrapper class representing physical registers. Should be passed by value.

Definition:MCRegister.h:33

llvm::MCSubtargetInfo

Generic base class for all target subtargets.

Definition:MCSubtargetInfo.h:76

llvm::MCSubtargetInfo::hasFeature

bool hasFeature(unsigned Feature) const

Definition:MCSubtargetInfo.h:121

llvm::MCSubtargetInfo::getTargetTriple

const Triple & getTargetTriple() const

Definition:MCSubtargetInfo.h:110

llvm::MCSubtargetInfo::getFeatureBits

const FeatureBitset & getFeatureBits() const

Definition:MCSubtargetInfo.h:114

llvm::MCSubtargetInfo::getCPU

StringRef getCPU() const

Definition:MCSubtargetInfo.h:111

llvm::Module

A Module instance is used to store all the information related to an LLVM module.

Definition:Module.h:65

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::StringLiteral

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

Definition:StringRef.h:853

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::StringRef::split

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

Definition:StringRef.h:700

llvm::StringRef::getAsInteger

bool getAsInteger(unsigned Radix, T &Result) const

Parse the current string as an integer of the specified radix.

Definition:StringRef.h:470

llvm::StringRef::str

std::string str() const

str - Get the contents as an std::string.

Definition:StringRef.h:229

llvm::StringRef::empty

constexpr bool empty() const

empty - Check if the string is empty.

Definition:StringRef.h:147

llvm::StringRef::size

constexpr size_t size() const

size - Get the string size.

Definition:StringRef.h:150

llvm::StringRef::ends_with

bool ends_with(StringRef Suffix) const

Check if this string ends with the given Suffix.

Definition:StringRef.h:277

llvm::SubtargetFeatures

Manages the enabling and disabling of subtarget specific features.

Definition:SubtargetFeature.h:174

llvm::SubtargetFeatures::getFeatures

const std::vector< std::string > & getFeatures() const

Returns the vector of individual subtarget features.

Definition:SubtargetFeature.h:189

llvm::Triple

Triple - Helper class for working with autoconf configuration names.

Definition:Triple.h:44

llvm::Triple::AMDHSA

@ AMDHSA

Definition:Triple.h:223

llvm::Triple::r600

@ r600

Definition:Triple.h:73

llvm::Triple::amdgcn

@ amdgcn

Definition:Triple.h:74

llvm::Triple::getOS

OSType getOS() const

Get the parsed operating system type of this triple.

Definition:Triple.h:404

llvm::Triple::getArch

ArchType getArch() const

Get the parsed architecture type of this triple.

Definition:Triple.h:395

llvm::Twine

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

Definition:Twine.h:81

llvm::cl::opt

Definition:CommandLine.h:1423

llvm::raw_ostream

This class implements an extremely fast bulk output stream that can only output to a stream.

Definition:raw_ostream.h:52

llvm::raw_string_ostream

A raw_ostream that writes to an std::string.

Definition:raw_ostream.h:661

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

OpName

Definition:R600Defines.h:62

llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

Definition:AMDGPUAddrSpace.h:38

llvm::AMDGPUAS::LOCAL_ADDRESS

@ LOCAL_ADDRESS

Address space for local memory.

Definition:AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::CONSTANT_ADDRESS

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

Definition:AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::GLOBAL_ADDRESS

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

Definition:AMDGPUAddrSpace.h:31

llvm::AMDGPU::DepCtr::decodeDepCtr

bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1698

llvm::AMDGPU::DepCtr::encodeFieldVaVdst

unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)

Definition:AMDGPUBaseInfo.cpp:1730

llvm::AMDGPU::DepCtr::decodeFieldSaSdst

unsigned decodeFieldSaSdst(unsigned Encoded)

Definition:AMDGPUBaseInfo.cpp:1718

llvm::AMDGPU::DepCtr::encodeFieldVmVsrc

unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)

Definition:AMDGPUBaseInfo.cpp:1722

llvm::AMDGPU::DepCtr::DEP_CTR_SIZE

const int DEP_CTR_SIZE

Definition:AMDGPUAsmUtils.cpp:83

llvm::AMDGPU::DepCtr::encodeDepCtr

int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1704

llvm::AMDGPU::DepCtr::encodeFieldSaSdst

unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)

Definition:AMDGPUBaseInfo.cpp:1738

llvm::AMDGPU::DepCtr::DepCtrInfo

const CustomOperandVal DepCtrInfo[]

Definition:AMDGPUAsmUtils.cpp:71

llvm::AMDGPU::DepCtr::isSymbolicDepCtrEncoding

bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1692

llvm::AMDGPU::DepCtr::decodeFieldVaVdst

unsigned decodeFieldVaVdst(unsigned Encoded)

Definition:AMDGPUBaseInfo.cpp:1714

llvm::AMDGPU::DepCtr::getDefaultDepCtrEncoding

int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1685

llvm::AMDGPU::DepCtr::decodeFieldVmVsrc

unsigned decodeFieldVmVsrc(unsigned Encoded)

Definition:AMDGPUBaseInfo.cpp:1710

llvm::AMDGPU::Exp::isSupportedTgtId

bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1804

llvm::AMDGPU::Exp::ExpTgtInfo

static constexpr ExpTgt ExpTgtInfo[]

Definition:AMDGPUBaseInfo.cpp:1760

llvm::AMDGPU::Exp::getTgtName

bool getTgtName(unsigned Id, StringRef &Name, int &Index)

Definition:AMDGPUBaseInfo.cpp:1770

llvm::AMDGPU::Exp::getTgtId

unsigned getTgtId(const StringRef Name)

Definition:AMDGPUBaseInfo.cpp:1781

llvm::AMDGPU::Exp::ET_DUAL_SRC_BLEND_MAX_IDX

@ ET_DUAL_SRC_BLEND_MAX_IDX

Definition:SIDefines.h:1016

llvm::AMDGPU::Exp::ET_PARAM_MAX_IDX

@ ET_PARAM_MAX_IDX

Definition:SIDefines.h:1017

llvm::AMDGPU::Exp::ET_NULL

@ ET_NULL

Definition:SIDefines.h:1000

llvm::AMDGPU::Exp::ET_PRIM_MAX_IDX

@ ET_PRIM_MAX_IDX

Definition:SIDefines.h:1013

llvm::AMDGPU::Exp::ET_MRT0

@ ET_MRT0

Definition:SIDefines.h:997

llvm::AMDGPU::Exp::ET_DUAL_SRC_BLEND1

@ ET_DUAL_SRC_BLEND1

Definition:SIDefines.h:1007

llvm::AMDGPU::Exp::ET_INVALID

@ ET_INVALID

Definition:SIDefines.h:1019

llvm::AMDGPU::Exp::ET_PRIM

@ ET_PRIM

Definition:SIDefines.h:1005

llvm::AMDGPU::Exp::ET_PARAM31

@ ET_PARAM31

Definition:SIDefines.h:1009

llvm::AMDGPU::Exp::ET_POS4

@ ET_POS4

Definition:SIDefines.h:1003

llvm::AMDGPU::Exp::ET_MRT_MAX_IDX

@ ET_MRT_MAX_IDX

Definition:SIDefines.h:1014

llvm::AMDGPU::Exp::ET_POS0

@ ET_POS0

Definition:SIDefines.h:1001

llvm::AMDGPU::Exp::ET_DUAL_SRC_BLEND0

@ ET_DUAL_SRC_BLEND0

Definition:SIDefines.h:1006

llvm::AMDGPU::Exp::ET_MRTZ_MAX_IDX

@ ET_MRTZ_MAX_IDX

Definition:SIDefines.h:1012

llvm::AMDGPU::Exp::ET_MRTZ

@ ET_MRTZ

Definition:SIDefines.h:999

llvm::AMDGPU::Exp::ET_NULL_MAX_IDX

@ ET_NULL_MAX_IDX

Definition:SIDefines.h:1011

llvm::AMDGPU::Exp::ET_POS_MAX_IDX

@ ET_POS_MAX_IDX

Definition:SIDefines.h:1015

llvm::AMDGPU::Exp::ET_PARAM0

@ ET_PARAM0

Definition:SIDefines.h:1008

llvm::AMDGPU::HSAMD::V3::VersionMajor

constexpr uint32_t VersionMajor

HSA metadata major version.

Definition:AMDGPUMetadata.h:460

llvm::AMDGPU::HWEncoding::IS_HI16

@ IS_HI16

Definition:SIDefines.h:374

llvm::AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET

@ COMPLETION_ACTION_OFFSET

Definition:SIDefines.h:1042

llvm::AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET

@ HOSTCALL_PTR_OFFSET

Definition:SIDefines.h:1037

llvm::AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET

@ DEFAULT_QUEUE_OFFSET

Definition:SIDefines.h:1041

llvm::AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET

@ MULTIGRID_SYNC_ARG_OFFSET

Definition:SIDefines.h:1038

llvm::AMDGPU::IsaInfo::getVGPREncodingGranule

unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)

Definition:AMDGPUBaseInfo.cpp:1157

llvm::AMDGPU::IsaInfo::getTotalNumVGPRs

unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1169

llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup

unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition:AMDGPUBaseInfo.cpp:1011

llvm::AMDGPU::IsaInfo::getWavefrontSize

unsigned getWavefrontSize(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:937

llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU

unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition:AMDGPUBaseInfo.cpp:979

llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize

unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1021

llvm::AMDGPU::IsaInfo::getMaxWavesPerEU

unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1002

llvm::AMDGPU::IsaInfo::getWavesPerWorkGroup

unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition:AMDGPUBaseInfo.cpp:1026

llvm::AMDGPU::IsaInfo::getNumExtraSGPRs

unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)

Definition:AMDGPUBaseInfo.cpp:1097

llvm::AMDGPU::IsaInfo::getSGPREncodingGranule

unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1040

llvm::AMDGPU::IsaInfo::getLocalMemorySize

unsigned getLocalMemorySize(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:946

llvm::AMDGPU::IsaInfo::getAddressableLocalMemorySize

unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:958

llvm::AMDGPU::IsaInfo::getMinNumVGPRs

unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)

Definition:AMDGPUBaseInfo.cpp:1231

llvm::AMDGPU::IsaInfo::getEUsPerCU

unsigned getEUsPerCU(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:968

llvm::AMDGPU::IsaInfo::TRAP_NUM_SGPRS

@ TRAP_NUM_SGPRS

Definition:AMDGPUBaseInfo.h:130

llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG

@ FIXED_NUM_SGPRS_FOR_INIT_BUG

Definition:AMDGPUBaseInfo.h:129

llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs

unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1051

llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs

unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1182

llvm::AMDGPU::IsaInfo::getMinNumSGPRs

unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)

Definition:AMDGPUBaseInfo.cpp:1063

llvm::AMDGPU::IsaInfo::getTargetIDSettingFromFeatureString

static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)

Definition:AMDGPUBaseInfo.cpp:874

llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize

unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1017

llvm::AMDGPU::IsaInfo::getMaxNumSGPRs

unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)

Definition:AMDGPUBaseInfo.cpp:1080

llvm::AMDGPU::IsaInfo::getNumSGPRBlocks

unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)

Definition:AMDGPUBaseInfo.cpp:1133

llvm::AMDGPU::IsaInfo::getMinWavesPerEU

unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:998

llvm::AMDGPU::IsaInfo::getSGPRAllocGranule

unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1031

llvm::AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs

unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)

Definition:AMDGPUBaseInfo.cpp:1188

llvm::AMDGPU::IsaInfo::TargetIDSetting

TargetIDSetting

Definition:AMDGPUBaseInfo.h:133

llvm::AMDGPU::IsaInfo::TargetIDSetting::On

@ On

llvm::AMDGPU::IsaInfo::TargetIDSetting::Unsupported

@ Unsupported

llvm::AMDGPU::IsaInfo::TargetIDSetting::Off

@ Off

llvm::AMDGPU::IsaInfo::TargetIDSetting::Any

@ Any

llvm::AMDGPU::IsaInfo::getMaxNumVGPRs

unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)

Definition:AMDGPUBaseInfo.cpp:1255

llvm::AMDGPU::IsaInfo::getEncodedNumVGPRBlocks

unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)

Definition:AMDGPUBaseInfo.cpp:1264

llvm::AMDGPU::IsaInfo::getOccupancyWithNumSGPRs

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)

Definition:AMDGPUBaseInfo.cpp:1204

llvm::AMDGPU::IsaInfo::getGranulatedNumRegisterBlocks

static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)

Definition:AMDGPUBaseInfo.cpp:1128

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule

unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)

Definition:AMDGPUBaseInfo.cpp:1139

llvm::AMDGPU::IsaInfo::getAddressableNumArchVGPRs

unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1180

llvm::AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks

unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)

Definition:AMDGPUBaseInfo.cpp:1271

llvm::AMDGPU::IsaInfo::getTotalNumSGPRs

unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1044

llvm::AMDGPU::MFMAScaleFormats::FP8_E5M2

@ FP8_E5M2

Definition:SIDefines.h:1056

llvm::AMDGPU::MFMAScaleFormats::FP4_E2M1

@ FP4_E2M1

Definition:SIDefines.h:1059

llvm::AMDGPU::MFMAScaleFormats::FP8_E4M3

@ FP8_E4M3

Definition:SIDefines.h:1055

llvm::AMDGPU::MFMAScaleFormats::FP6_E3M2

@ FP6_E3M2

Definition:SIDefines.h:1058

llvm::AMDGPU::MFMAScaleFormats::FP6_E2M3

@ FP6_E2M3

Definition:SIDefines.h:1057

llvm::AMDGPU::MTBUFFormat::UfmtSymbolicGFX11

StringLiteral const UfmtSymbolicGFX11[]

Definition:AMDGPUAsmUtils.cpp:484

llvm::AMDGPU::MTBUFFormat::DFMT_MIN

@ DFMT_MIN

Definition:SIDefines.h:601

llvm::AMDGPU::MTBUFFormat::DFMT_UNDEF

@ DFMT_UNDEF

Definition:SIDefines.h:604

llvm::AMDGPU::MTBUFFormat::DFMT_MASK

@ DFMT_MASK

Definition:SIDefines.h:608

llvm::AMDGPU::MTBUFFormat::DFMT_MAX

@ DFMT_MAX

Definition:SIDefines.h:602

llvm::AMDGPU::MTBUFFormat::DFMT_SHIFT

@ DFMT_SHIFT

Definition:SIDefines.h:607

llvm::AMDGPU::MTBUFFormat::isValidUnifiedFormat

bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1905

llvm::AMDGPU::MTBUFFormat::getDefaultFormatEncoding

unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1930

llvm::AMDGPU::MTBUFFormat::getUnifiedFormatName

StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1899

llvm::AMDGPU::MTBUFFormat::DfmtNfmt2UFmtGFX10

unsigned const DfmtNfmt2UFmtGFX10[]

Definition:AMDGPUAsmUtils.cpp:390

llvm::AMDGPU::MTBUFFormat::DfmtSymbolic

StringLiteral const DfmtSymbolic[]

Definition:AMDGPUAsmUtils.cpp:244

llvm::AMDGPU::MTBUFFormat::getNfmtLookupTable

static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1842

llvm::AMDGPU::MTBUFFormat::NFMT_UNDEF

@ NFMT_UNDEF

Definition:SIDefines.h:625

llvm::AMDGPU::MTBUFFormat::NFMT_SHIFT

@ NFMT_SHIFT

Definition:SIDefines.h:628

llvm::AMDGPU::MTBUFFormat::NFMT_MASK

@ NFMT_MASK

Definition:SIDefines.h:629

llvm::AMDGPU::MTBUFFormat::NFMT_MAX

@ NFMT_MAX

Definition:SIDefines.h:623

llvm::AMDGPU::MTBUFFormat::NFMT_MIN

@ NFMT_MIN

Definition:SIDefines.h:622

llvm::AMDGPU::MTBUFFormat::isValidNfmt

bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1871

llvm::AMDGPU::MTBUFFormat::NfmtSymbolicGFX10

StringLiteral const NfmtSymbolicGFX10[]

Definition:AMDGPUAsmUtils.cpp:263

llvm::AMDGPU::MTBUFFormat::isValidDfmtNfmt

bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1864

llvm::AMDGPU::MTBUFFormat::convertDfmtNfmt2Ufmt

int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1909

llvm::AMDGPU::MTBUFFormat::getDfmtName

StringRef getDfmtName(unsigned Id)

Definition:AMDGPUBaseInfo.cpp:1837

llvm::AMDGPU::MTBUFFormat::encodeDfmtNfmt

int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)

Definition:AMDGPUBaseInfo.cpp:1875

llvm::AMDGPU::MTBUFFormat::getUnifiedFormat

int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1884

llvm::AMDGPU::MTBUFFormat::isValidFormatEncoding

bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1926

llvm::AMDGPU::MTBUFFormat::getNfmtName

StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1859

llvm::AMDGPU::MTBUFFormat::DFMT_NFMT_DEFAULT

@ DFMT_NFMT_DEFAULT

Definition:SIDefines.h:634

llvm::AMDGPU::MTBUFFormat::DFMT_NFMT_MAX

@ DFMT_NFMT_MAX

Definition:SIDefines.h:640

llvm::AMDGPU::MTBUFFormat::DfmtNfmt2UFmtGFX11

unsigned const DfmtNfmt2UFmtGFX11[]

Definition:AMDGPUAsmUtils.cpp:564

llvm::AMDGPU::MTBUFFormat::NfmtSymbolicVI

StringLiteral const NfmtSymbolicVI[]

Definition:AMDGPUAsmUtils.cpp:285

llvm::AMDGPU::MTBUFFormat::NfmtSymbolicSICI

StringLiteral const NfmtSymbolicSICI[]

Definition:AMDGPUAsmUtils.cpp:274

llvm::AMDGPU::MTBUFFormat::getNfmt

int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1850

llvm::AMDGPU::MTBUFFormat::getDfmt

int64_t getDfmt(const StringRef Name)

Definition:AMDGPUBaseInfo.cpp:1829

llvm::AMDGPU::MTBUFFormat::UFMT_UNDEF

@ UFMT_UNDEF

Definition:SIDefines.h:645

llvm::AMDGPU::MTBUFFormat::UFMT_DEFAULT

@ UFMT_DEFAULT

Definition:SIDefines.h:646

llvm::AMDGPU::MTBUFFormat::UFMT_MAX

@ UFMT_MAX

Definition:SIDefines.h:644

llvm::AMDGPU::MTBUFFormat::UfmtSymbolicGFX10

StringLiteral const UfmtSymbolicGFX10[]

Definition:AMDGPUAsmUtils.cpp:296

llvm::AMDGPU::MTBUFFormat::decodeDfmtNfmt

void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)

Definition:AMDGPUBaseInfo.cpp:1879

llvm::AMDGPU::SendMsg::encodeMsg

uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)

Definition:AMDGPUBaseInfo.cpp:2014

llvm::AMDGPU::SendMsg::StreamId

StreamId

Definition:SIDefines.h:491

llvm::AMDGPU::SendMsg::STREAM_ID_LAST_

@ STREAM_ID_LAST_

Definition:SIDefines.h:494

llvm::AMDGPU::SendMsg::STREAM_ID_SHIFT_

@ STREAM_ID_SHIFT_

Definition:SIDefines.h:496

llvm::AMDGPU::SendMsg::STREAM_ID_FIRST_

@ STREAM_ID_FIRST_

Definition:SIDefines.h:495

llvm::AMDGPU::SendMsg::STREAM_ID_MASK_

@ STREAM_ID_MASK_

Definition:SIDefines.h:498

llvm::AMDGPU::SendMsg::STREAM_ID_NONE_

@ STREAM_ID_NONE_

Definition:SIDefines.h:492

llvm::AMDGPU::SendMsg::ID_MASK_PreGFX11_

@ ID_MASK_PreGFX11_

Definition:SIDefines.h:467

llvm::AMDGPU::SendMsg::ID_SYSMSG

@ ID_SYSMSG

Definition:SIDefines.h:456

llvm::AMDGPU::SendMsg::ID_GS_PreGFX11

@ ID_GS_PreGFX11

Definition:SIDefines.h:442

llvm::AMDGPU::SendMsg::ID_GS_DONE_PreGFX11

@ ID_GS_DONE_PreGFX11

Definition:SIDefines.h:443

llvm::AMDGPU::SendMsg::ID_MASK_GFX11Plus_

@ ID_MASK_GFX11Plus_

Definition:SIDefines.h:468

llvm::AMDGPU::SendMsg::msgSupportsStream

bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1995

llvm::AMDGPU::SendMsg::decodeMsg

void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2002

llvm::AMDGPU::SendMsg::isValidMsgId

bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1948

llvm::AMDGPU::SendMsg::OP_GS_NOP

@ OP_GS_NOP

Definition:SIDefines.h:478

llvm::AMDGPU::SendMsg::OP_NONE_

@ OP_NONE_

Definition:SIDefines.h:473

llvm::AMDGPU::SendMsg::OP_SHIFT_

@ OP_SHIFT_

Definition:SIDefines.h:472

llvm::AMDGPU::SendMsg::OP_MASK_

@ OP_MASK_

Definition:SIDefines.h:476

llvm::AMDGPU::SendMsg::isValidMsgStream

bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)

Definition:AMDGPUBaseInfo.cpp:1969

llvm::AMDGPU::SendMsg::getMsgOpName

StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)

Map from an encoding to the symbolic name for a sendmsg operation.

Definition:AMDGPUAsmUtils.cpp:153

llvm::AMDGPU::SendMsg::getMsgIdMask

static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1944

llvm::AMDGPU::SendMsg::msgRequiresOp

bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1989

llvm::AMDGPU::SendMsg::isValidMsgOp

bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)

Definition:AMDGPUBaseInfo.cpp:1952

llvm::AMDGPU::UfmtGFX10::UFMT_LAST

@ UFMT_LAST

Definition:SIDefines.h:746

llvm::AMDGPU::UfmtGFX10::UFMT_FIRST

@ UFMT_FIRST

Definition:SIDefines.h:745

llvm::AMDGPU::UfmtGFX11::UFMT_FIRST

@ UFMT_FIRST

Definition:SIDefines.h:831

llvm::AMDGPU::UfmtGFX11::UFMT_LAST

@ UFMT_LAST

Definition:SIDefines.h:832

llvm::AMDGPU::VOPD::VOPD_VGPR_BANK_MASKS

constexpr unsigned VOPD_VGPR_BANK_MASKS[]

Definition:AMDGPUBaseInfo.h:656

llvm::AMDGPU::VOPD::COMPONENTS_NUM

constexpr unsigned COMPONENTS_NUM

Definition:AMDGPUBaseInfo.h:660

llvm::AMDGPU::VOPD::X

@ X

Definition:AMDGPUBaseInfo.h:658

llvm::AMDGPU::VOPD::Y

@ Y

Definition:AMDGPUBaseInfo.h:658

llvm::AMDGPU::VOPD::COMPONENT_X

@ COMPONENT_X

Definition:AMDGPUBaseInfo.h:710

llvm::AMDGPU::VOPD::MAX_SRC_NUM

@ MAX_SRC_NUM

Definition:AMDGPUBaseInfo.h:650

llvm::AMDGPU::VOPD::MAX_OPR_NUM

@ MAX_OPR_NUM

Definition:AMDGPUBaseInfo.h:651

llvm::AMDGPU::VOPD::DST

@ DST

Definition:AMDGPUBaseInfo.h:644

llvm::AMDGPU::VOPD::SRC2

@ SRC2

Definition:AMDGPUBaseInfo.h:647

llvm::AMDGPU::VOPD::DST_NUM

@ DST_NUM

Definition:AMDGPUBaseInfo.h:649

llvm::AMDGPU::VOPD::SRC1

@ SRC1

Definition:AMDGPUBaseInfo.h:646

llvm::AMDGPU::VOPD::SRC0

@ SRC0

Definition:AMDGPUBaseInfo.h:645

llvm::AMDGPU::isGCN3Encoding

bool isGCN3Encoding(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2226

llvm::AMDGPU::isInlinableLiteralBF16

bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)

Definition:AMDGPUBaseInfo.cpp:2642

llvm::AMDGPU::isGFX10_BEncoding

bool isGFX10_BEncoding(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2234

llvm::AMDGPU::isGFX10_GFX11

bool isGFX10_GFX11(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2190

llvm::AMDGPU::isInlinableLiteralV216

bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)

Definition:AMDGPUBaseInfo.cpp:2774

llvm::AMDGPU::getMIMGInfo

LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

llvm::AMDGPU::getRegOperandSize

unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)

Get size of register operand.

Definition:AMDGPUBaseInfo.cpp:2592

llvm::AMDGPU::decodeWaitcnt

void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)

Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...

Definition:AMDGPUBaseInfo.cpp:1472

llvm::AMDGPU::isInlinableLiteralFP16

bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)

Definition:AMDGPUBaseInfo.cpp:2663

llvm::AMDGPU::isSGPR

bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)

Is Reg - scalar register.

Definition:AMDGPUBaseInfo.cpp:2281

llvm::AMDGPU::convertSMRDOffsetUnits

uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)

Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.

Definition:AMDGPUBaseInfo.cpp:2898

llvm::AMDGPU::encodeStorecnt

static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)

Definition:AMDGPUBaseInfo.cpp:1561

llvm::AMDGPU::getMCReg

MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)

If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.

Definition:AMDGPUBaseInfo.cpp:2349

llvm::AMDGPU::hasSMEMByteOffset

static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)

Definition:AMDGPUBaseInfo.cpp:2870

llvm::AMDGPU::isVOPCAsmOnly

bool isVOPCAsmOnly(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:526

llvm::AMDGPU::getMIMGOpcode

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)

Definition:AMDGPUBaseInfo.cpp:273

llvm::AMDGPU::getMTBUFHasSrsrc

bool getMTBUFHasSrsrc(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:452

llvm::AMDGPU::getSMRDEncodedLiteralOffset32

std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)

Definition:AMDGPUBaseInfo.cpp:2936

llvm::AMDGPU::isSymbolicCustomOperandEncoding

static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1614

llvm::AMDGPU::isGFX10Before1030

bool isGFX10Before1030(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2222

llvm::AMDGPU::isSISrcInlinableOperand

bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)

Does this operand support only inlinable literals?

Definition:AMDGPUBaseInfo.cpp:2436

llvm::AMDGPU::mapWMMA2AddrTo3AddrOpcode

unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:671

llvm::AMDGPU::OPR_ID_UNSUPPORTED

const int OPR_ID_UNSUPPORTED

Definition:AMDGPUAsmUtils.h:24

llvm::AMDGPU::shouldEmitConstantsToTextSection

bool shouldEmitConstantsToTextSection(const Triple &TT)

Definition:AMDGPUBaseInfo.cpp:1327

llvm::AMDGPU::isInlinableLiteralV2I16

bool isInlinableLiteralV2I16(uint32_t Literal)

Definition:AMDGPUBaseInfo.cpp:2794

llvm::AMDGPU::getMTBUFElements

int getMTBUFElements(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:442

llvm::AMDGPU::isHi16Reg

bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)

Definition:AMDGPUBaseInfo.cpp:2288

llvm::AMDGPU::encodeCustomOperandVal

static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)

Definition:AMDGPUBaseInfo.cpp:1650

llvm::AMDGPU::getTotalNumVGPRs

int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)

Definition:AMDGPUBaseInfo.cpp:2274

llvm::AMDGPU::isGFX10

bool isGFX10(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2186

llvm::AMDGPU::getNamedOperandIdx

LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)

llvm::AMDGPU::isInlinableLiteralV2BF16

bool isInlinableLiteralV2BF16(uint32_t Literal)

Definition:AMDGPUBaseInfo.cpp:2799

llvm::AMDGPU::getMaxNumUserSGPRs

unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2146

llvm::AMDGPU::getInlineEncodingV216

std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)

Definition:AMDGPUBaseInfo.cpp:2680

llvm::AMDGPU::getFPDstSelType

FPType getFPDstSelType(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:659

llvm::AMDGPU::getNumFlatOffsetBits

unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)

For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.

Definition:AMDGPUBaseInfo.cpp:2946

llvm::AMDGPU::AMDHSA_COV4

@ AMDHSA_COV4

Definition:AMDGPUBaseInfo.h:56

llvm::AMDGPU::AMDHSA_COV5

@ AMDHSA_COV5

Definition:AMDGPUBaseInfo.h:56

llvm::AMDGPU::AMDHSA_COV6

@ AMDHSA_COV6

Definition:AMDGPUBaseInfo.h:56

llvm::AMDGPU::hasA16

bool hasA16(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2118

llvm::AMDGPU::isLegalSMRDEncodedSignedOffset

bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)

Definition:AMDGPUBaseInfo.cpp:2883

llvm::AMDGPU::isGFX12Plus

bool isGFX12Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2210

llvm::AMDGPU::getNSAMaxSize

unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)

Definition:AMDGPUBaseInfo.cpp:2135

llvm::AMDGPU::getCanBeVOPD

CanBeVOPD getCanBeVOPD(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:570

llvm::AMDGPU::hasPackedD16

bool hasPackedD16(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2126

llvm::AMDGPU::getStorecntBitMask

unsigned getStorecntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1438

llvm::AMDGPU::getLdsDwGranularity

unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)

Definition:AMDGPUBaseInfo.cpp:3024

llvm::AMDGPU::isGFX940

bool isGFX940(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2250

llvm::AMDGPU::isEntryFunctionCC

bool isEntryFunctionCC(CallingConv::ID CC)

Definition:AMDGPUBaseInfo.cpp:2066

llvm::AMDGPU::isInlinableLiteralV2F16

bool isInlinableLiteralV2F16(uint32_t Literal)

Definition:AMDGPUBaseInfo.cpp:2804

llvm::AMDGPU::isHsaAbi

bool isHsaAbi(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:168

llvm::AMDGPU::isGFX11

bool isGFX11(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2198

llvm::AMDGPU::OPR_VAL_INVALID

const int OPR_VAL_INVALID

Definition:AMDGPUAsmUtils.h:26

llvm::AMDGPU::getSMEMIsBuffer

bool getSMEMIsBuffer(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:502

llvm::AMDGPU::isGFX10_3_GFX11

bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2242

llvm::AMDGPU::mfmaScaleF8F6F4FormatToNumRegs

uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)

Definition:AMDGPUBaseInfo.cpp:538

llvm::AMDGPU::isGroupSegment

bool isGroupSegment(const GlobalValue *GV)

Definition:AMDGPUBaseInfo.cpp:1313

llvm::AMDGPU::getIsaVersion

IsaVersion getIsaVersion(StringRef GPU)

Definition:TargetParser.cpp:229

llvm::AMDGPU::getMTBUFHasSoffset

bool getMTBUFHasSoffset(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:457

llvm::AMDGPU::hasXNACK

bool hasXNACK(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2106

llvm::AMDGPU::isValid32BitLiteral

bool isValid32BitLiteral(uint64_t Val, bool IsFP64)

Definition:AMDGPUBaseInfo.cpp:2808

llvm::AMDGPU::getCombinedCountBitMask

static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)

Definition:AMDGPUBaseInfo.cpp:1521

llvm::AMDGPU::getVOPDOpcode

unsigned getVOPDOpcode(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:577

llvm::AMDGPU::isDPALU_DPP

bool isDPALU_DPP(const MCInstrDesc &OpDesc)

Definition:AMDGPUBaseInfo.cpp:3020

llvm::AMDGPU::encodeWaitcnt

unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)

Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.

Definition:AMDGPUBaseInfo.cpp:1508

llvm::AMDGPU::isVOPC64DPP

bool isVOPC64DPP(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:522

llvm::AMDGPU::getMUBUFOpcode

int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)

Definition:AMDGPUBaseInfo.cpp:467

llvm::AMDGPU::isCompute

bool isCompute(CallingConv::ID cc)

Definition:AMDGPUBaseInfo.cpp:2062

llvm::AMDGPU::getMAIIsGFX940XDL

bool getMAIIsGFX940XDL(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:533

llvm::AMDGPU::isSI

bool isSI(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2148

llvm::AMDGPU::getDefaultAMDHSACodeObjectVersion

unsigned getDefaultAMDHSACodeObjectVersion()

Definition:AMDGPUBaseInfo.cpp:181

llvm::AMDGPU::isReadOnlySegment

bool isReadOnlySegment(const GlobalValue *GV)

Definition:AMDGPUBaseInfo.cpp:1321

llvm::AMDGPU::isArgPassedInSGPR

bool isArgPassedInSGPR(const Argument *A)

Definition:AMDGPUBaseInfo.cpp:2815

llvm::AMDGPU::isIntrinsicAlwaysUniform

bool isIntrinsicAlwaysUniform(unsigned IntrID)

Definition:AMDGPUBaseInfo.cpp:2981

llvm::AMDGPU::getMUBUFBaseOpcode

int getMUBUFBaseOpcode(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:462

llvm::AMDGPU::getAMDHSACodeObjectVersion

unsigned getAMDHSACodeObjectVersion(const Module &M)

Definition:AMDGPUBaseInfo.cpp:172

llvm::AMDGPU::decodeLgkmcnt

unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition:AMDGPUBaseInfo.cpp:1467

llvm::AMDGPU::getWaitcntBitMask

unsigned getWaitcntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1442

llvm::AMDGPU::getVOP3IsSingle

bool getVOP3IsSingle(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:517

llvm::AMDGPU::isGFX9

bool isGFX9(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2160

llvm::AMDGPU::getVOP1IsSingle

bool getVOP1IsSingle(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:507

llvm::AMDGPU::isDwordAligned

static bool isDwordAligned(uint64_t ByteOffset)

Definition:AMDGPUBaseInfo.cpp:2894

llvm::AMDGPU::getVOPDEncodingFamily

unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)

Definition:AMDGPUBaseInfo.cpp:562

llvm::AMDGPU::isGFX10_AEncoding

bool isGFX10_AEncoding(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2230

llvm::AMDGPU::isKImmOperand

bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this a KImm operand?

Definition:AMDGPUBaseInfo.cpp:2403

llvm::AMDGPU::getHasColorExport

bool getHasColorExport(const Function &F)

Definition:AMDGPUBaseInfo.cpp:2030

llvm::AMDGPU::getMTBUFBaseOpcode

int getMTBUFBaseOpcode(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:432

llvm::AMDGPU::isChainCC

bool isChainCC(CallingConv::ID CC)

Definition:AMDGPUBaseInfo.cpp:2092

llvm::AMDGPU::isGFX90A

bool isGFX90A(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2246

llvm::AMDGPU::getSamplecntBitMask

unsigned getSamplecntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1414

llvm::AMDGPU::getDefaultQueueImplicitArgPosition

unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)

Definition:AMDGPUBaseInfo.cpp:240

llvm::AMDGPU::hasSRAMECC

bool hasSRAMECC(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2110

llvm::AMDGPU::getHasDepthExport

bool getHasDepthExport(const Function &F)

Definition:AMDGPUBaseInfo.cpp:2037

llvm::AMDGPU::isGFX8_GFX9_GFX10

bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2172

llvm::AMDGPU::getMUBUFHasVAddr

bool getMUBUFHasVAddr(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:477

llvm::AMDGPU::getVOPDFull

int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)

Definition:AMDGPUBaseInfo.cpp:688

llvm::AMDGPU::isTrue16Inst

bool isTrue16Inst(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:654

llvm::AMDGPU::hasAny64BitVGPROperands

bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)

Definition:AMDGPUBaseInfo.cpp:3005

llvm::AMDGPU::getVOPDComponents

std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)

Definition:AMDGPUBaseInfo.cpp:694

llvm::AMDGPU::isInlinableLiteral32

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

Definition:AMDGPUBaseInfo.cpp:2616

llvm::AMDGPU::isGFX12

bool isGFX12(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2206

llvm::AMDGPU::getInitialPSInputAddr

unsigned getInitialPSInputAddr(const Function &F)

Definition:AMDGPUBaseInfo.cpp:2026

llvm::AMDGPU::encodeExpcnt

unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)

Definition:AMDGPUBaseInfo.cpp:1496

llvm::AMDGPU::isSISrcOperand

bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this an AMDGPU specific source operand? These include registers, inline constants,...

Definition:AMDGPUBaseInfo.cpp:2396

llvm::AMDGPU::getKmcntBitMask

unsigned getKmcntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1434

llvm::AMDGPU::getVmcntBitMask

unsigned getVmcntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1404

llvm::AMDGPU::FPType

FPType

Definition:AMDGPUBaseInfo.h:58

llvm::AMDGPU::FPType::FP4

@ FP4

llvm::AMDGPU::FPType::None

@ None

llvm::AMDGPU::FPType::FP8

@ FP8

llvm::AMDGPU::isNotGFX10Plus

bool isNotGFX10Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2218

llvm::AMDGPU::hasMAIInsts

bool hasMAIInsts(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2258

llvm::AMDGPU::isIntrinsicSourceOfDivergence

bool isIntrinsicSourceOfDivergence(unsigned IntrID)

Definition:AMDGPUBaseInfo.cpp:2977

llvm::AMDGPU::isKernelCC

bool isKernelCC(const Function *Func)

Definition:AMDGPUBaseInfo.cpp:2102

llvm::AMDGPU::isGenericAtomic

bool isGenericAtomic(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:634

llvm::AMDGPU::decodeStorecntDscnt

Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)

Definition:AMDGPUBaseInfo.cpp:1545

llvm::AMDGPU::isGFX8Plus

bool isGFX8Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2176

llvm::AMDGPU::hasNamedOperand

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)

Definition:AMDGPUBaseInfo.h:400

llvm::AMDGPU::isInlinableIntLiteral

LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)

Is this literal inlinable, and not one of the values intended for floating point values.

Definition:AMDGPUBaseInfo.h:1458

llvm::AMDGPU::getLgkmcntBitMask

unsigned getLgkmcntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1426

llvm::AMDGPU::getMUBUFTfe

bool getMUBUFTfe(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:497

llvm::AMDGPU::getBvhcntBitMask

unsigned getBvhcntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1418

llvm::AMDGPU::hasSMRDSignedImmOffset

bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)

Definition:AMDGPUBaseInfo.cpp:163

llvm::AMDGPU::hasMIMG_R128

bool hasMIMG_R128(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2114

llvm::AMDGPU::hasGFX10_3Insts

bool hasGFX10_3Insts(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2238

llvm::AMDGPU::hasG16

bool hasG16(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2122

llvm::AMDGPU::getAddrSizeMIMGOp

unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)

Definition:AMDGPUBaseInfo.cpp:293

llvm::AMDGPU::getMTBUFOpcode

int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)

Definition:AMDGPUBaseInfo.cpp:437

llvm::AMDGPU::getExpcntBitMask

unsigned getExpcntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1422

llvm::AMDGPU::hasArchitectedFlatScratch

bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2254

llvm::AMDGPU::getMUBUFHasSoffset

bool getMUBUFHasSoffset(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:487

llvm::AMDGPU::isNotGFX11Plus

bool isNotGFX11Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2214

llvm::AMDGPU::isGFX11Plus

bool isGFX11Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2202

llvm::AMDGPU::getInlineEncodingV2F16

std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)

Definition:AMDGPUBaseInfo.cpp:2769

llvm::AMDGPU::isInlineValue

bool isInlineValue(unsigned Reg)

Definition:AMDGPUBaseInfo.cpp:2367

llvm::AMDGPU::isSISrcFPOperand

bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this floating-point operand?

Definition:AMDGPUBaseInfo.cpp:2410

llvm::AMDGPU::isShader

bool isShader(CallingConv::ID cc)

Definition:AMDGPUBaseInfo.cpp:2041

llvm::AMDGPU::getHostcallImplicitArgPosition

unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)

Definition:AMDGPUBaseInfo.cpp:229

llvm::AMDGPU::getDefaultCustomOperandEncoding

static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1602

llvm::AMDGPU::encodeLoadcnt

static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)

Definition:AMDGPUBaseInfo.cpp:1555

llvm::AMDGPU::isGFX10Plus

bool isGFX10Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2194

llvm::AMDGPU::decodeCustomOperand

static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1633

llvm::AMDGPU::getSMRDEncodedOffset

std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)

Definition:AMDGPUBaseInfo.cpp:2907

llvm::AMDGPU::isGlobalSegment

bool isGlobalSegment(const GlobalValue *GV)

Definition:AMDGPUBaseInfo.cpp:1317

llvm::AMDGPU::OPERAND_KIMM_LAST

@ OPERAND_KIMM_LAST

Definition:SIDefines.h:269

llvm::AMDGPU::OPERAND_KIMM32

@ OPERAND_KIMM32

Operand with 32-bit immediate that uses the constant bus.

Definition:SIDefines.h:234

llvm::AMDGPU::OPERAND_REG_INLINE_C_LAST

@ OPERAND_REG_INLINE_C_LAST

Definition:SIDefines.h:260

llvm::AMDGPU::OPERAND_REG_IMM_V2FP16

@ OPERAND_REG_IMM_V2FP16

Definition:SIDefines.h:211

llvm::AMDGPU::OPERAND_REG_INLINE_C_FP64

@ OPERAND_REG_INLINE_C_FP64

Definition:SIDefines.h:223

llvm::AMDGPU::OPERAND_REG_INLINE_C_V2BF16

@ OPERAND_REG_INLINE_C_V2BF16

Definition:SIDefines.h:225

llvm::AMDGPU::OPERAND_REG_IMM_V2INT16

@ OPERAND_REG_IMM_V2INT16

Definition:SIDefines.h:212

llvm::AMDGPU::OPERAND_REG_INLINE_AC_V2FP16

@ OPERAND_REG_INLINE_AC_V2FP16

Definition:SIDefines.h:246

llvm::AMDGPU::OPERAND_SRC_FIRST

@ OPERAND_SRC_FIRST

Definition:SIDefines.h:265

llvm::AMDGPU::OPERAND_REG_IMM_V2BF16

@ OPERAND_REG_IMM_V2BF16

Definition:SIDefines.h:210

llvm::AMDGPU::OPERAND_REG_INLINE_AC_FIRST

@ OPERAND_REG_INLINE_AC_FIRST

Definition:SIDefines.h:262

llvm::AMDGPU::OPERAND_KIMM_FIRST

@ OPERAND_KIMM_FIRST

Definition:SIDefines.h:268

llvm::AMDGPU::OPERAND_REG_IMM_FP16

@ OPERAND_REG_IMM_FP16

Definition:SIDefines.h:206

llvm::AMDGPU::OPERAND_REG_IMM_FP64

@ OPERAND_REG_IMM_FP64

Definition:SIDefines.h:204

llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP16

@ OPERAND_REG_INLINE_C_V2FP16

Definition:SIDefines.h:226

llvm::AMDGPU::OPERAND_REG_INLINE_AC_V2INT16

@ OPERAND_REG_INLINE_AC_V2INT16

Definition:SIDefines.h:244

llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP16

@ OPERAND_REG_INLINE_AC_FP16

Definition:SIDefines.h:241

llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP32

@ OPERAND_REG_INLINE_AC_FP32

Definition:SIDefines.h:242

llvm::AMDGPU::OPERAND_REG_INLINE_AC_V2BF16

@ OPERAND_REG_INLINE_AC_V2BF16

Definition:SIDefines.h:245

llvm::AMDGPU::OPERAND_REG_IMM_FP32

@ OPERAND_REG_IMM_FP32

Definition:SIDefines.h:203

llvm::AMDGPU::OPERAND_REG_INLINE_C_FIRST

@ OPERAND_REG_INLINE_C_FIRST

Definition:SIDefines.h:259

llvm::AMDGPU::OPERAND_REG_INLINE_C_FP32

@ OPERAND_REG_INLINE_C_FP32

Definition:SIDefines.h:222

llvm::AMDGPU::OPERAND_REG_INLINE_AC_LAST

@ OPERAND_REG_INLINE_AC_LAST

Definition:SIDefines.h:263

llvm::AMDGPU::OPERAND_REG_INLINE_C_V2INT16

@ OPERAND_REG_INLINE_C_V2INT16

Definition:SIDefines.h:224

llvm::AMDGPU::OPERAND_REG_IMM_V2FP32

@ OPERAND_REG_IMM_V2FP32

Definition:SIDefines.h:214

llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP64

@ OPERAND_REG_INLINE_AC_FP64

Definition:SIDefines.h:243

llvm::AMDGPU::OPERAND_REG_INLINE_C_FP16

@ OPERAND_REG_INLINE_C_FP16

Definition:SIDefines.h:221

llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP32

@ OPERAND_REG_INLINE_C_V2FP32

Definition:SIDefines.h:228

llvm::AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED

@ OPERAND_REG_IMM_FP32_DEFERRED

Definition:SIDefines.h:209

llvm::AMDGPU::OPERAND_SRC_LAST

@ OPERAND_SRC_LAST

Definition:SIDefines.h:266

llvm::AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED

@ OPERAND_REG_IMM_FP16_DEFERRED

Definition:SIDefines.h:208

llvm::AMDGPU::initDefaultAMDKernelCodeT

void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)

Definition:AMDGPUBaseInfo.cpp:1279

llvm::AMDGPU::isNotGFX9Plus

bool isNotGFX9Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2184

llvm::AMDGPU::hasGDS

bool hasGDS(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2131

llvm::AMDGPU::isLegalSMRDEncodedUnsignedOffset

bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)

Definition:AMDGPUBaseInfo.cpp:2874

llvm::AMDGPU::isGFX9Plus

bool isGFX9Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2180

llvm::AMDGPU::hasDPPSrc1SGPR

bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2266

llvm::AMDGPU::OPR_ID_DUPLICATE

const int OPR_ID_DUPLICATE

Definition:AMDGPUAsmUtils.h:25

llvm::AMDGPU::isVOPD

bool isVOPD(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:582

llvm::AMDGPU::getVOPDInstInfo

VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)

Definition:AMDGPUBaseInfo.cpp:790

llvm::AMDGPU::encodeVmcnt

unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)

Definition:AMDGPUBaseInfo.cpp:1487

llvm::AMDGPU::decodeExpcnt

unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition:AMDGPUBaseInfo.cpp:1462

llvm::AMDGPU::isCvt_F32_Fp8_Bf8_e64

bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:621

llvm::AMDGPU::decodeLoadcntDscnt

Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)

Definition:AMDGPUBaseInfo.cpp:1535

llvm::AMDGPU::getInlineEncodingV2I16

std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)

Definition:AMDGPUBaseInfo.cpp:2735

llvm::AMDGPU::getRegBitWidth

unsigned getRegBitWidth(const TargetRegisterClass &RC)

Get the size in bits of a register from the register class RC.

Definition:SIRegisterInfo.cpp:3201

llvm::AMDGPU::encodeStorecntDscnt

static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)

Definition:AMDGPUBaseInfo.cpp:1585

llvm::AMDGPU::getMCOpcode

int getMCOpcode(uint16_t Opcode, unsigned Gen)

Definition:AMDGPUBaseInfo.cpp:684

llvm::AMDGPU::getMIMGBaseOpcode

const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:280

llvm::AMDGPU::isVI

bool isVI(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2156

llvm::AMDGPU::getMUBUFIsBufferInv

bool getMUBUFIsBufferInv(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:492

llvm::AMDGPU::mc2PseudoReg

MCRegister mc2PseudoReg(MCRegister Reg)

Convert hardware register Reg to a pseudo register.

Definition:AMDGPUBaseInfo.cpp:2365

llvm::AMDGPU::getInlineEncodingV2BF16

std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)

Definition:AMDGPUBaseInfo.cpp:2741

llvm::AMDGPU::encodeCustomOperand

static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:1657

llvm::AMDGPU::hasKernargPreload

unsigned hasKernargPreload(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2270

llvm::AMDGPU::isMAC

bool isMAC(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:586

llvm::AMDGPU::isCI

bool isCI(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2152

llvm::AMDGPU::encodeLgkmcnt

unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)

Definition:AMDGPUBaseInfo.cpp:1502

llvm::AMDGPU::getVOP2IsSingle

bool getVOP2IsSingle(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:512

llvm::AMDGPU::getMAIIsDGEMM

bool getMAIIsDGEMM(unsigned Opc)

Returns true if MAI operation is a double precision GEMM.

Definition:AMDGPUBaseInfo.cpp:528

llvm::AMDGPU::getMIMGBaseOpcodeInfo

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

llvm::AMDGPU::OPR_ID_UNKNOWN

const int OPR_ID_UNKNOWN

Definition:AMDGPUAsmUtils.h:23

llvm::AMDGPU::getCompletionActionImplicitArgPosition

unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)

Definition:AMDGPUBaseInfo.cpp:251

llvm::AMDGPU::getIntegerVecAttribute

SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)

Definition:AMDGPUBaseInfo.cpp:1367

llvm::AMDGPU::getMaskedMIMGOp

int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)

Definition:AMDGPUBaseInfo.cpp:285

llvm::AMDGPU::isModuleEntryFunctionCC

bool isModuleEntryFunctionCC(CallingConv::ID CC)

Definition:AMDGPUBaseInfo.cpp:2083

llvm::AMDGPU::isNotGFX12Plus

bool isNotGFX12Plus(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2212

llvm::AMDGPU::getMTBUFHasVAddr

bool getMTBUFHasVAddr(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:447

llvm::AMDGPU::decodeVmcnt

unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition:AMDGPUBaseInfo.cpp:1454

llvm::AMDGPU::getELFABIVersion

uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)

Definition:AMDGPUBaseInfo.cpp:198

llvm::AMDGPU::getIntegerPairAttribute

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

Definition:AMDGPUBaseInfo.cpp:1332

llvm::AMDGPU::getLoadcntBitMask

unsigned getLoadcntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1410

llvm::AMDGPU::isInlinableLiteralI16

bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)

Definition:AMDGPUBaseInfo.cpp:2659

llvm::AMDGPU::hasVOPD

bool hasVOPD(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2262

llvm::AMDGPU::encodeDscnt

static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)

Definition:AMDGPUBaseInfo.cpp:1567

llvm::AMDGPU::isInlinableLiteral64

bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)

Is this literal inlinable.

Definition:AMDGPUBaseInfo.cpp:2599

llvm::AMDGPU::getMFMA_F8F6F4_WithFormatArgs

const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)

Definition:AMDGPUBaseInfo.cpp:554

llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition

unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)

Definition:AMDGPUBaseInfo.cpp:215

llvm::AMDGPU::isGFX9_GFX10_GFX11

bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2168

llvm::AMDGPU::isGFX9_GFX10

bool isGFX9_GFX10(const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2164

llvm::AMDGPU::getMUBUFElements

int getMUBUFElements(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:472

llvm::AMDGPU::encodeLoadcntDscnt

static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)

Definition:AMDGPUBaseInfo.cpp:1573

llvm::AMDGPU::getGcnBufferFormatInfo

const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)

Definition:AMDGPUBaseInfo.cpp:2985

llvm::AMDGPU::isGraphics

bool isGraphics(CallingConv::ID cc)

Definition:AMDGPUBaseInfo.cpp:2058

llvm::AMDGPU::mapWMMA3AddrTo2AddrOpcode

unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:676

llvm::AMDGPU::isPermlane16

bool isPermlane16(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:610

llvm::AMDGPU::getMUBUFHasSrsrc

bool getMUBUFHasSrsrc(unsigned Opc)

Definition:AMDGPUBaseInfo.cpp:482

llvm::AMDGPU::getDscntBitMask

unsigned getDscntBitMask(const IsaVersion &Version)

Definition:AMDGPUBaseInfo.cpp:1430

llvm::BitmaskEnumDetail::Mask

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

Definition:BitmaskEnum.h:125

llvm::CallingConv::AMDGPU_CS

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

Definition:CallingConv.h:197

llvm::CallingConv::AMDGPU_VS

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

Definition:CallingConv.h:188

llvm::CallingConv::AMDGPU_KERNEL

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

Definition:CallingConv.h:200

llvm::CallingConv::AMDGPU_Gfx

@ AMDGPU_Gfx

Used for AMD graphics targets.

Definition:CallingConv.h:232

llvm::CallingConv::AMDGPU_CS_ChainPreserve

@ AMDGPU_CS_ChainPreserve

Used on AMDGPUs to give the middle-end more control over argument placement.

Definition:CallingConv.h:249

llvm::CallingConv::AMDGPU_HS

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

Definition:CallingConv.h:206

llvm::CallingConv::AMDGPU_GS

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

Definition:CallingConv.h:191

llvm::CallingConv::AMDGPU_CS_Chain

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

Definition:CallingConv.h:245

llvm::CallingConv::AMDGPU_PS

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

Definition:CallingConv.h:194

llvm::CallingConv::SPIR_KERNEL

@ SPIR_KERNEL

Used for SPIR kernel functions.

Definition:CallingConv.h:144

llvm::CallingConv::AMDGPU_ES

@ AMDGPU_ES

Used for AMDPAL shader stage before geometry shader if geometry is in use.

Definition:CallingConv.h:218

llvm::CallingConv::AMDGPU_LS

@ AMDGPU_LS

Used for AMDPAL vertex shader if tessellation is in use.

Definition:CallingConv.h:213

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition:CallingConv.h:24

llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V4

@ ELFABIVERSION_AMDGPU_HSA_V4

Definition:ELF.h:381

llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V5

@ ELFABIVERSION_AMDGPU_HSA_V5

Definition:ELF.h:382

llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V6

@ ELFABIVERSION_AMDGPU_HSA_V6

Definition:ELF.h:383

llvm::MCOI::TIED_TO

@ TIED_TO

Definition:MCInstrDesc.h:36

llvm::SIEncodingFamily::GFX11

@ GFX11

Definition:SIDefines.h:46

llvm::SIEncodingFamily::GFX12

@ GFX12

Definition:SIDefines.h:47

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::alignDown

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

Definition:MathExtras.h:556

llvm::report_fatal_error

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

Definition:Error.cpp:167

llvm::errs

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

Definition:raw_ostream.cpp:907

llvm::divideCeil

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

Definition:MathExtras.h:404

llvm::alignTo

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

Definition:Alignment.h:155

llvm::ReplacementType::Format

@ Format

llvm::ReplacementType::Literal

@ Literal

llvm::operator<<

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

Definition:APFixedPoint.h:303

llvm::InstructionUniformity::AlwaysUniform

@ AlwaysUniform

The result values are always uniform.

llvm::InstructionUniformity::Default

@ Default

The result values are uniform if and only if all operands are uniform.

llvm::Version

@ Version

Definition:PGOCtxProfWriter.h:22

#define N

amd_kernel_code_t

AMD Kernel Code Object (amd_kernel_code_t).

Definition:AMDKernelCodeT.h:526

llvm::AMDGPU::AMDGPUMCKernelCodeT

Definition:AMDKernelCodeTUtils.h:33

llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_version_major

uint16_t amd_machine_version_major

Definition:AMDKernelCodeTUtils.h:41

llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_kind

uint16_t amd_machine_kind

Definition:AMDKernelCodeTUtils.h:40

llvm::AMDGPU::AMDGPUMCKernelCodeT::wavefront_size

uint8_t wavefront_size

Definition:AMDKernelCodeTUtils.h:63

llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_version_stepping

uint16_t amd_machine_version_stepping

Definition:AMDKernelCodeTUtils.h:43

llvm::AMDGPU::AMDGPUMCKernelCodeT::private_segment_alignment

uint8_t private_segment_alignment

Definition:AMDKernelCodeTUtils.h:62

llvm::AMDGPU::AMDGPUMCKernelCodeT::kernel_code_entry_byte_offset

int64_t kernel_code_entry_byte_offset

Definition:AMDKernelCodeTUtils.h:44

llvm::AMDGPU::AMDGPUMCKernelCodeT::code_properties

uint32_t code_properties

Definition:AMDKernelCodeTUtils.h:49

llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_kernel_code_version_major

uint32_t amd_kernel_code_version_major

Definition:AMDKernelCodeTUtils.h:38

llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_machine_version_minor

uint16_t amd_machine_version_minor

Definition:AMDKernelCodeTUtils.h:42

llvm::AMDGPU::AMDGPUMCKernelCodeT::group_segment_alignment

uint8_t group_segment_alignment

Definition:AMDKernelCodeTUtils.h:61

llvm::AMDGPU::AMDGPUMCKernelCodeT::kernarg_segment_alignment

uint8_t kernarg_segment_alignment

Definition:AMDKernelCodeTUtils.h:60

llvm::AMDGPU::AMDGPUMCKernelCodeT::amd_kernel_code_version_minor

uint32_t amd_kernel_code_version_minor

Definition:AMDKernelCodeTUtils.h:39

llvm::AMDGPU::AMDGPUMCKernelCodeT::call_convention

int32_t call_convention

Definition:AMDKernelCodeTUtils.h:64

llvm::AMDGPU::AMDGPUMCKernelCodeT::compute_pgm_resource_registers

uint64_t compute_pgm_resource_registers

Definition:AMDKernelCodeTUtils.h:48

llvm::AMDGPU::CanBeVOPD

Definition:AMDGPUBaseInfo.h:588

llvm::AMDGPU::CustomOperandVal

Definition:AMDGPUAsmUtils.h:34

llvm::AMDGPU::DPMACCInstructionInfo

Definition:AMDGPUBaseInfo.cpp:384

llvm::AMDGPU::DPMACCInstructionInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:385

llvm::AMDGPU::DPMACCInstructionInfo::IsDPMACCInstruction

bool IsDPMACCInstruction

Definition:AMDGPUBaseInfo.cpp:386

llvm::AMDGPU::Exp::ExpTgt

Definition:AMDGPUBaseInfo.cpp:1754

llvm::AMDGPU::Exp::ExpTgt::Tgt

unsigned Tgt

Definition:AMDGPUBaseInfo.cpp:1756

llvm::AMDGPU::Exp::ExpTgt::Name

StringLiteral Name

Definition:AMDGPUBaseInfo.cpp:1755

llvm::AMDGPU::Exp::ExpTgt::MaxIndex

unsigned MaxIndex

Definition:AMDGPUBaseInfo.cpp:1757

llvm::AMDGPU::FP4FP8DstByteSelInfo

Definition:AMDGPUBaseInfo.cpp:389

llvm::AMDGPU::FP4FP8DstByteSelInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:390

llvm::AMDGPU::FP4FP8DstByteSelInfo::HasFP8DstByteSel

bool HasFP8DstByteSel

Definition:AMDGPUBaseInfo.cpp:391

llvm::AMDGPU::FP4FP8DstByteSelInfo::HasFP4DstByteSel

bool HasFP4DstByteSel

Definition:AMDGPUBaseInfo.cpp:392

llvm::AMDGPU::GcnBufferFormatInfo

Definition:AMDGPUBaseInfo.h:87

llvm::AMDGPU::IsaVersion

Instruction set architecture version.

Definition:TargetParser.h:130

llvm::AMDGPU::MAIInstInfo

Definition:AMDGPUBaseInfo.h:95

llvm::AMDGPU::MFMA_F8F6F4_Info

Definition:AMDGPUBaseInfo.h:101

llvm::AMDGPU::MIMGBaseOpcodeInfo

Definition:AMDGPUBaseInfo.h:407

llvm::AMDGPU::MIMGBaseOpcodeInfo::Gradients

bool Gradients

Definition:AMDGPUBaseInfo.h:416

llvm::AMDGPU::MIMGBaseOpcodeInfo::G16

bool G16

Definition:AMDGPUBaseInfo.h:417

llvm::AMDGPU::MIMGBaseOpcodeInfo::LodOrClampOrMip

bool LodOrClampOrMip

Definition:AMDGPUBaseInfo.h:419

llvm::AMDGPU::MIMGBaseOpcodeInfo::Coordinates

bool Coordinates

Definition:AMDGPUBaseInfo.h:418

llvm::AMDGPU::MIMGBaseOpcodeInfo::NumExtraArgs

uint8_t NumExtraArgs

Definition:AMDGPUBaseInfo.h:415

llvm::AMDGPU::MIMGDimInfo

Definition:AMDGPUBaseInfo.h:433

llvm::AMDGPU::MIMGDimInfo::NumCoords

uint8_t NumCoords

Definition:AMDGPUBaseInfo.h:435

llvm::AMDGPU::MIMGDimInfo::NumGradients

uint8_t NumGradients

Definition:AMDGPUBaseInfo.h:436

llvm::AMDGPU::MIMGInfo

Definition:AMDGPUBaseInfo.h:509

llvm::AMDGPU::MIMGInfo::BaseOpcode

uint16_t BaseOpcode

Definition:AMDGPUBaseInfo.h:511

llvm::AMDGPU::MIMGInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.h:510

llvm::AMDGPU::MIMGInfo::VAddrDwords

uint8_t VAddrDwords

Definition:AMDGPUBaseInfo.h:514

llvm::AMDGPU::MIMGInfo::MIMGEncoding

uint8_t MIMGEncoding

Definition:AMDGPUBaseInfo.h:512

llvm::AMDGPU::MTBUFInfo

Definition:AMDGPUBaseInfo.cpp:332

llvm::AMDGPU::MTBUFInfo::BaseOpcode

uint16_t BaseOpcode

Definition:AMDGPUBaseInfo.cpp:334

llvm::AMDGPU::MTBUFInfo::has_srsrc

bool has_srsrc

Definition:AMDGPUBaseInfo.cpp:337

llvm::AMDGPU::MTBUFInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:333

llvm::AMDGPU::MTBUFInfo::has_vaddr

bool has_vaddr

Definition:AMDGPUBaseInfo.cpp:336

llvm::AMDGPU::MTBUFInfo::elements

uint8_t elements

Definition:AMDGPUBaseInfo.cpp:335

llvm::AMDGPU::MTBUFInfo::has_soffset

bool has_soffset

Definition:AMDGPUBaseInfo.cpp:338

llvm::AMDGPU::MUBUFInfo

Definition:AMDGPUBaseInfo.cpp:321

llvm::AMDGPU::MUBUFInfo::IsBufferInv

bool IsBufferInv

Definition:AMDGPUBaseInfo.cpp:328

llvm::AMDGPU::MUBUFInfo::has_srsrc

bool has_srsrc

Definition:AMDGPUBaseInfo.cpp:326

llvm::AMDGPU::MUBUFInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:322

llvm::AMDGPU::MUBUFInfo::elements

uint8_t elements

Definition:AMDGPUBaseInfo.cpp:324

llvm::AMDGPU::MUBUFInfo::tfe

bool tfe

Definition:AMDGPUBaseInfo.cpp:329

llvm::AMDGPU::MUBUFInfo::has_soffset

bool has_soffset

Definition:AMDGPUBaseInfo.cpp:327

llvm::AMDGPU::MUBUFInfo::has_vaddr

bool has_vaddr

Definition:AMDGPUBaseInfo.cpp:325

llvm::AMDGPU::MUBUFInfo::BaseOpcode

uint16_t BaseOpcode

Definition:AMDGPUBaseInfo.cpp:323

llvm::AMDGPU::SMInfo

Definition:AMDGPUBaseInfo.cpp:341

llvm::AMDGPU::SMInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:342

llvm::AMDGPU::SMInfo::IsBuffer

bool IsBuffer

Definition:AMDGPUBaseInfo.cpp:343

llvm::AMDGPU::VOP3CDPPAsmOnlyInfo

Definition:AMDGPUBaseInfo.cpp:359

llvm::AMDGPU::VOP3CDPPAsmOnlyInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:360

llvm::AMDGPU::VOPC64DPPInfo

Definition:AMDGPUBaseInfo.cpp:351

llvm::AMDGPU::VOPC64DPPInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:352

llvm::AMDGPU::VOPCDPPAsmOnlyInfo

Definition:AMDGPUBaseInfo.cpp:355

llvm::AMDGPU::VOPCDPPAsmOnlyInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:356

llvm::AMDGPU::VOPDComponentInfo

Definition:AMDGPUBaseInfo.cpp:363

llvm::AMDGPU::VOPDComponentInfo::VOPDOp

uint16_t VOPDOp

Definition:AMDGPUBaseInfo.cpp:365

llvm::AMDGPU::VOPDComponentInfo::BaseVOP

uint16_t BaseVOP

Definition:AMDGPUBaseInfo.cpp:364

llvm::AMDGPU::VOPDComponentInfo::CanBeVOPDX

bool CanBeVOPDX

Definition:AMDGPUBaseInfo.cpp:366

llvm::AMDGPU::VOPDInfo

Definition:AMDGPUBaseInfo.cpp:369

llvm::AMDGPU::VOPDInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:370

llvm::AMDGPU::VOPDInfo::OpX

uint16_t OpX

Definition:AMDGPUBaseInfo.cpp:371

llvm::AMDGPU::VOPDInfo::Subtarget

uint16_t Subtarget

Definition:AMDGPUBaseInfo.cpp:373

llvm::AMDGPU::VOPDInfo::OpY

uint16_t OpY

Definition:AMDGPUBaseInfo.cpp:372

llvm::AMDGPU::VOPInfo

Definition:AMDGPUBaseInfo.cpp:346

llvm::AMDGPU::VOPInfo::IsSingle

bool IsSingle

Definition:AMDGPUBaseInfo.cpp:348

llvm::AMDGPU::VOPInfo::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:347

llvm::AMDGPU::VOPTrue16Info

Definition:AMDGPUBaseInfo.cpp:376

llvm::AMDGPU::VOPTrue16Info::IsTrue16

bool IsTrue16

Definition:AMDGPUBaseInfo.cpp:378

llvm::AMDGPU::VOPTrue16Info::Opcode

uint16_t Opcode

Definition:AMDGPUBaseInfo.cpp:377

llvm::AMDGPU::WMMAOpcodeMappingInfo

Definition:AMDGPUBaseInfo.h:480

llvm::AMDGPU::Waitcnt

Represents the counter values to wait for in an s_waitcnt instruction.

Definition:AMDGPUBaseInfo.h:967

llvm::AMDGPU::Waitcnt::ExpCnt

unsigned ExpCnt

Definition:AMDGPUBaseInfo.h:969

llvm::AMDGPU::Waitcnt::LoadCnt

unsigned LoadCnt

Definition:AMDGPUBaseInfo.h:968

llvm::AMDGPU::Waitcnt::StoreCnt

unsigned StoreCnt

Definition:AMDGPUBaseInfo.h:971

llvm::AMDGPU::Waitcnt::DsCnt

unsigned DsCnt

Definition:AMDGPUBaseInfo.h:970

llvm::DWARFExpression::Operation::Description

Description of the encoding of one expression Op.

Definition:DWARFExpression.h:66

llvm::cl::desc

Definition:CommandLine.h:409