1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 53enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
55//===----------------------------------------------------------------------===// 57//===----------------------------------------------------------------------===// 67SMLoc StartLoc, EndLoc;
68const AMDGPUAsmParser *AsmParser;
71 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
72 :
Kind(Kind_), AsmParser(AsmParser_) {}
74usingPtr = std::unique_ptr<AMDGPUOperand>;
82bool hasFPModifiers()
const{
return Abs || Neg; }
83bool hasIntModifiers()
const{
return Sext; }
84bool hasModifiers()
const{
return hasFPModifiers() || hasIntModifiers(); }
86 int64_t getFPModifiersOperand()
const{
93 int64_t getIntModifiersOperand()
const{
99 int64_t getModifiersOperand()
const{
100assert(!(hasFPModifiers() && hasIntModifiers())
101 &&
"fp and int modifiers should not be used simultaneously");
103return getFPModifiersOperand();
104if (hasIntModifiers())
105return getIntModifiersOperand();
177// Immediate operand kind. 178// It helps to identify the location of an offending operand after an error. 179// Note that regular literals and mandatory literals (KImm) must be handled 180// differently. When looking for an offending operand, we should usually 181// ignore mandatory literals because they are part of the instruction and 182// cannot be changed. Report location of mandatory operands only for VOPD, 183// when both OpX and OpY have a KImm and there are no other literals. 187 ImmKindTyMandatoryLiteral,
201mutable ImmKindTy
Kind;
220bool isSymbolRefExpr()
const{
221return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
224boolisImm()
const override{
225returnKind == Immediate;
228void setImmKindNone()
const{
230Imm.Kind = ImmKindTyNone;
233void setImmKindLiteral()
const{
235Imm.Kind = ImmKindTyLiteral;
238void setImmKindMandatoryLiteral()
const{
240Imm.Kind = ImmKindTyMandatoryLiteral;
243void setImmKindConst()
const{
245Imm.Kind = ImmKindTyConst;
248bool IsImmKindLiteral()
const{
249returnisImm() &&
Imm.Kind == ImmKindTyLiteral;
252bool IsImmKindMandatoryLiteral()
const{
253returnisImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
256bool isImmKindConst()
const{
257returnisImm() &&
Imm.Kind == ImmKindTyConst;
260bool isInlinableImm(
MVT type)
const;
261bool isLiteralImm(
MVT type)
const;
263bool isRegKind()
const{
267boolisReg()
const override{
268return isRegKind() && !hasModifiers();
271bool isRegOrInline(
unsigned RCID,
MVT type)
const{
272return isRegClass(RCID) || isInlinableImm(type);
276return isRegOrInline(RCID, type) || isLiteralImm(type);
279bool isRegOrImmWithInt16InputMods()
const{
283template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const{
285 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
288bool isRegOrImmWithInt32InputMods()
const{
292bool isRegOrInlineImmWithInt16InputMods()
const{
293return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
296template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const{
298 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
301bool isRegOrInlineImmWithInt32InputMods()
const{
302return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
305bool isRegOrImmWithInt64InputMods()
const{
309bool isRegOrImmWithFP16InputMods()
const{
313template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const{
315 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
318bool isRegOrImmWithFP32InputMods()
const{
322bool isRegOrImmWithFP64InputMods()
const{
326template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const{
328 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
331bool isRegOrInlineImmWithFP32InputMods()
const{
332return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
335bool isPackedFP16InputMods()
const{
339bool isPackedFP32InputMods()
const{
344return isRegClass(AMDGPU::VGPR_32RegClassID) ||
345 isRegClass(AMDGPU::VReg_64RegClassID) ||
346 isRegClass(AMDGPU::VReg_96RegClassID) ||
347 isRegClass(AMDGPU::VReg_128RegClassID) ||
348 isRegClass(AMDGPU::VReg_160RegClassID) ||
349 isRegClass(AMDGPU::VReg_192RegClassID) ||
350 isRegClass(AMDGPU::VReg_256RegClassID) ||
351 isRegClass(AMDGPU::VReg_512RegClassID) ||
352 isRegClass(AMDGPU::VReg_1024RegClassID);
355bool isVReg32()
const{
356return isRegClass(AMDGPU::VGPR_32RegClassID);
359bool isVReg32OrOff()
const{
360return isOff() || isVReg32();
364return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
367bool isVRegWithInputMods()
const;
368template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
369template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
371bool isSDWAOperand(
MVT type)
const;
372bool isSDWAFP16Operand()
const;
373bool isSDWAFP32Operand()
const;
374bool isSDWAInt16Operand()
const;
375bool isSDWAInt32Operand()
const;
377bool isImmTy(ImmTy ImmT)
const{
381template <ImmTy Ty>
bool isImmTy()
const{
return isImmTy(Ty); }
383bool isImmLiteral()
const{
return isImmTy(ImmTyNone); }
385bool isImmModifier()
const{
386returnisImm() &&
Imm.Type != ImmTyNone;
389bool isOModSI()
const{
return isImmTy(ImmTyOModSI); }
390bool isDim()
const{
return isImmTy(ImmTyDim); }
391bool isR128A16()
const{
return isImmTy(ImmTyR128A16); }
392bool isOff()
const{
return isImmTy(ImmTyOff); }
393bool isExpTgt()
const{
return isImmTy(ImmTyExpTgt); }
394bool isOffen()
const{
return isImmTy(ImmTyOffen); }
395bool isIdxen()
const{
return isImmTy(ImmTyIdxen); }
396bool isAddr64()
const{
return isImmTy(ImmTyAddr64); }
397bool isSMEMOffsetMod()
const{
return isImmTy(ImmTySMEMOffsetMod); }
398bool isFlatOffset()
const{
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
399bool isGDS()
const{
return isImmTy(ImmTyGDS); }
400bool isLDS()
const{
return isImmTy(ImmTyLDS); }
401bool isCPol()
const{
return isImmTy(ImmTyCPol); }
402bool isIndexKey8bit()
const{
return isImmTy(ImmTyIndexKey8bit); }
403bool isIndexKey16bit()
const{
return isImmTy(ImmTyIndexKey16bit); }
404bool isTFE()
const{
return isImmTy(ImmTyTFE); }
405bool isFORMAT()
const{
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
406bool isDppFI()
const{
return isImmTy(ImmTyDppFI); }
407bool isSDWADstSel()
const{
return isImmTy(ImmTySDWADstSel); }
408bool isSDWASrc0Sel()
const{
return isImmTy(ImmTySDWASrc0Sel); }
409bool isSDWASrc1Sel()
const{
return isImmTy(ImmTySDWASrc1Sel); }
410bool isSDWADstUnused()
const{
return isImmTy(ImmTySDWADstUnused); }
411bool isInterpSlot()
const{
return isImmTy(ImmTyInterpSlot); }
412bool isInterpAttr()
const{
return isImmTy(ImmTyInterpAttr); }
413bool isInterpAttrChan()
const{
return isImmTy(ImmTyInterpAttrChan); }
414bool isOpSel()
const{
return isImmTy(ImmTyOpSel); }
415bool isOpSelHi()
const{
return isImmTy(ImmTyOpSelHi); }
416bool isNegLo()
const{
return isImmTy(ImmTyNegLo); }
417bool isNegHi()
const{
return isImmTy(ImmTyNegHi); }
418bool isBitOp3()
const{
return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
420bool isRegOrImm()
const{
424bool isRegClass(
unsigned RCID)
const;
428bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const{
429return isRegOrInline(RCID, type) && !hasModifiers();
432bool isSCSrcB16()
const{
433return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
436bool isSCSrcV2B16()
const{
440bool isSCSrc_b32()
const{
441return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
444bool isSCSrc_b64()
const{
445return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
448bool isBoolReg()
const;
450bool isSCSrcF16()
const{
451return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
454bool isSCSrcV2F16()
const{
458bool isSCSrcF32()
const{
459return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
462bool isSCSrcF64()
const{
463return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
466bool isSSrc_b32()
const{
467return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
470bool isSSrc_b16()
const{
return isSCSrcB16() || isLiteralImm(MVT::i16); }
472bool isSSrcV2B16()
const{
477bool isSSrc_b64()
const{
478// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 480return isSCSrc_b64() || isLiteralImm(MVT::i64);
483bool isSSrc_f32()
const{
484return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
487bool isSSrcF64()
const{
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
489bool isSSrc_bf16()
const{
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
491bool isSSrc_f16()
const{
return isSCSrcB16() || isLiteralImm(MVT::f16); }
493bool isSSrcV2F16()
const{
498bool isSSrcV2FP32()
const{
503bool isSCSrcV2FP32()
const{
508bool isSSrcV2INT32()
const{
513bool isSCSrcV2INT32()
const{
518bool isSSrcOrLds_b32()
const{
519return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
520 isLiteralImm(MVT::i32) || isExpr();
523bool isVCSrc_b32()
const{
524return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
527bool isVCSrcB64()
const{
528return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
531bool isVCSrcT_b16()
const{
532return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
535bool isVCSrcTB16_Lo128()
const{
536return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
539bool isVCSrcFake16B16_Lo128()
const{
540return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
543bool isVCSrc_b16()
const{
544return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
547bool isVCSrc_v2b16()
const{
return isVCSrc_b16(); }
549bool isVCSrc_f32()
const{
550return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
553bool isVCSrcF64()
const{
554return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
557bool isVCSrcTBF16()
const{
558return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
561bool isVCSrcT_f16()
const{
562return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
565bool isVCSrcT_bf16()
const{
566return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
569bool isVCSrcTBF16_Lo128()
const{
570return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
573bool isVCSrcTF16_Lo128()
const{
574return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
577bool isVCSrcFake16BF16_Lo128()
const{
578return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
581bool isVCSrcFake16F16_Lo128()
const{
582return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
585bool isVCSrc_bf16()
const{
586return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
589bool isVCSrc_f16()
const{
590return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
593bool isVCSrc_v2bf16()
const{
return isVCSrc_bf16(); }
595bool isVCSrc_v2f16()
const{
return isVCSrc_f16(); }
597bool isVSrc_b32()
const{
598return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
601bool isVSrc_b64()
const{
return isVCSrcF64() || isLiteralImm(MVT::i64); }
603bool isVSrcT_b16()
const{
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
605bool isVSrcT_b16_Lo128()
const{
606return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
609bool isVSrcFake16_b16_Lo128()
const{
610return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
613bool isVSrc_b16()
const{
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
615bool isVSrc_v2b16()
const{
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
617bool isVCSrcV2FP32()
const{
621bool isVSrc_v2f32()
const{
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
623bool isVCSrcV2INT32()
const{
627bool isVSrc_v2b32()
const{
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
629bool isVSrc_f32()
const{
630return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
633bool isVSrc_f64()
const{
return isVCSrcF64() || isLiteralImm(MVT::f64); }
635bool isVSrcT_bf16()
const{
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
637bool isVSrcT_f16()
const{
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
639bool isVSrcT_bf16_Lo128()
const{
640return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
643bool isVSrcT_f16_Lo128()
const{
644return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
647bool isVSrcFake16_bf16_Lo128()
const{
648return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
651bool isVSrcFake16_f16_Lo128()
const{
652return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
655bool isVSrc_bf16()
const{
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
657bool isVSrc_f16()
const{
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
659bool isVSrc_v2bf16()
const{
660return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
663bool isVSrc_v2f16()
const{
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
665bool isVISrcB32()
const{
666return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
669bool isVISrcB16()
const{
670return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
673bool isVISrcV2B16()
const{
677bool isVISrcF32()
const{
678return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
681bool isVISrcF16()
const{
682return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
685bool isVISrcV2F16()
const{
686return isVISrcF16() || isVISrcB32();
689bool isVISrc_64_bf16()
const{
690return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
693bool isVISrc_64_f16()
const{
694return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
697bool isVISrc_64_b32()
const{
698return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
701bool isVISrc_64B64()
const{
702return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
705bool isVISrc_64_f64()
const{
706return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
709bool isVISrc_64V2FP32()
const{
710return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
713bool isVISrc_64V2INT32()
const{
714return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
717bool isVISrc_256_b32()
const{
718return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
721bool isVISrc_256_f32()
const{
722return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
725bool isVISrc_256B64()
const{
726return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
729bool isVISrc_256_f64()
const{
730return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
733bool isVISrc_128B16()
const{
734return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
737bool isVISrc_128V2B16()
const{
738return isVISrc_128B16();
741bool isVISrc_128_b32()
const{
742return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
745bool isVISrc_128_f32()
const{
746return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
749bool isVISrc_256V2FP32()
const{
750return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
753bool isVISrc_256V2INT32()
const{
754return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
757bool isVISrc_512_b32()
const{
758return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
761bool isVISrc_512B16()
const{
762return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
765bool isVISrc_512V2B16()
const{
766return isVISrc_512B16();
769bool isVISrc_512_f32()
const{
770return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
773bool isVISrc_512F16()
const{
774return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
777bool isVISrc_512V2F16()
const{
778return isVISrc_512F16() || isVISrc_512_b32();
781bool isVISrc_1024_b32()
const{
782return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
785bool isVISrc_1024B16()
const{
786return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
789bool isVISrc_1024V2B16()
const{
790return isVISrc_1024B16();
793bool isVISrc_1024_f32()
const{
794return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
797bool isVISrc_1024F16()
const{
798return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
801bool isVISrc_1024V2F16()
const{
802return isVISrc_1024F16() || isVISrc_1024_b32();
805bool isAISrcB32()
const{
806return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
809bool isAISrcB16()
const{
810return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
813bool isAISrcV2B16()
const{
817bool isAISrcF32()
const{
818return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
821bool isAISrcF16()
const{
822return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
825bool isAISrcV2F16()
const{
826return isAISrcF16() || isAISrcB32();
829bool isAISrc_64B64()
const{
830return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
833bool isAISrc_64_f64()
const{
834return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
837bool isAISrc_128_b32()
const{
838return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
841bool isAISrc_128B16()
const{
842return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
845bool isAISrc_128V2B16()
const{
846return isAISrc_128B16();
849bool isAISrc_128_f32()
const{
850return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
853bool isAISrc_128F16()
const{
854return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
857bool isAISrc_128V2F16()
const{
858return isAISrc_128F16() || isAISrc_128_b32();
861bool isVISrc_128_bf16()
const{
862return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
865bool isVISrc_128_f16()
const{
866return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
869bool isVISrc_128V2F16()
const{
870return isVISrc_128_f16() || isVISrc_128_b32();
873bool isAISrc_256B64()
const{
874return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
877bool isAISrc_256_f64()
const{
878return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
881bool isAISrc_512_b32()
const{
882return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
885bool isAISrc_512B16()
const{
886return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
889bool isAISrc_512V2B16()
const{
890return isAISrc_512B16();
893bool isAISrc_512_f32()
const{
894return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
897bool isAISrc_512F16()
const{
898return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
901bool isAISrc_512V2F16()
const{
902return isAISrc_512F16() || isAISrc_512_b32();
905bool isAISrc_1024_b32()
const{
906return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
909bool isAISrc_1024B16()
const{
910return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
913bool isAISrc_1024V2B16()
const{
914return isAISrc_1024B16();
917bool isAISrc_1024_f32()
const{
918return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
921bool isAISrc_1024F16()
const{
922return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
925bool isAISrc_1024V2F16()
const{
926return isAISrc_1024F16() || isAISrc_1024_b32();
929bool isKImmFP32()
const{
930return isLiteralImm(MVT::f32);
933bool isKImmFP16()
const{
934return isLiteralImm(MVT::f16);
937boolisMem()
const override{
945bool isSOPPBrTarget()
const{
return isExpr() ||
isImm(); }
947bool isSWaitCnt()
const;
949bool isSDelayALU()
const;
951bool isSendMsg()
const;
952bool isSplitBarrier()
const;
953bool isSwizzle()
const;
954bool isSMRDOffset8()
const;
955bool isSMEMOffset()
const;
956bool isSMRDLiteralOffset()
const;
958bool isDPPCtrl()
const;
960bool isGPRIdxMode()
const;
966return [=](){
returnP(*
this); };
974 int64_t getImm()
const{
979void setImm(int64_t Val) {
984 ImmTy getImmTy()
const{
994SMLoc getStartLoc()
const override{
998SMLoc getEndLoc()
const override{
1003returnSMRange(StartLoc, EndLoc);
1006 Modifiers getModifiers()
const{
1007assert(isRegKind() || isImmTy(ImmTyNone));
1008return isRegKind() ?
Reg.Mods :
Imm.Mods;
1011void setModifiers(Modifiers Mods) {
1012assert(isRegKind() || isImmTy(ImmTyNone));
1019bool hasModifiers()
const{
1020return getModifiers().hasModifiers();
1023bool hasFPModifiers()
const{
1024return getModifiers().hasFPModifiers();
1027bool hasIntModifiers()
const{
1028return getModifiers().hasIntModifiers();
1033void addImmOperands(
MCInst &Inst,
unsignedN,
bool ApplyModifiers =
true)
const;
1035void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1037void addRegOperands(
MCInst &Inst,
unsignedN)
const;
1039void addRegOrImmOperands(
MCInst &Inst,
unsignedN)
const{
1041 addRegOperands(Inst,
N);
1043 addImmOperands(Inst,
N);
1046void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsignedN)
const{
1047 Modifiers Mods = getModifiers();
1050 addRegOperands(Inst,
N);
1052 addImmOperands(Inst,
N,
false);
1056void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsignedN)
const{
1057assert(!hasIntModifiers());
1058 addRegOrImmWithInputModsOperands(Inst,
N);
1061void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsignedN)
const{
1062assert(!hasFPModifiers());
1063 addRegOrImmWithInputModsOperands(Inst,
N);
1066void addRegWithInputModsOperands(
MCInst &Inst,
unsignedN)
const{
1067 Modifiers Mods = getModifiers();
1070 addRegOperands(Inst,
N);
1073void addRegWithFPInputModsOperands(
MCInst &Inst,
unsignedN)
const{
1074assert(!hasIntModifiers());
1075 addRegWithInputModsOperands(Inst,
N);
1078void addRegWithIntInputModsOperands(
MCInst &Inst,
unsignedN)
const{
1079assert(!hasFPModifiers());
1080 addRegWithInputModsOperands(Inst,
N);
1086case ImmTyNone:
OS <<
"None";
break;
1087case ImmTyGDS:
OS <<
"GDS";
break;
1088case ImmTyLDS:
OS <<
"LDS";
break;
1089case ImmTyOffen:
OS <<
"Offen";
break;
1090case ImmTyIdxen:
OS <<
"Idxen";
break;
1091case ImmTyAddr64:
OS <<
"Addr64";
break;
1092case ImmTyOffset:
OS <<
"Offset";
break;
1093case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1094case ImmTyOffset0:
OS <<
"Offset0";
break;
1095case ImmTyOffset1:
OS <<
"Offset1";
break;
1096case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1097case ImmTyCPol:
OS <<
"CPol";
break;
1098case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1099case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1100case ImmTyTFE:
OS <<
"TFE";
break;
1101case ImmTyD16:
OS <<
"D16";
break;
1102case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1103case ImmTyClamp:
OS <<
"Clamp";
break;
1104case ImmTyOModSI:
OS <<
"OModSI";
break;
1105case ImmTyDPP8:
OS <<
"DPP8";
break;
1106case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1107case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1108case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1109case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1110case ImmTyDppFI:
OS <<
"DppFI";
break;
1111case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1112case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1113case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1114case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1115case ImmTyDMask:
OS <<
"DMask";
break;
1116case ImmTyDim:
OS <<
"Dim";
break;
1117case ImmTyUNorm:
OS <<
"UNorm";
break;
1118case ImmTyDA:
OS <<
"DA";
break;
1119case ImmTyR128A16:
OS <<
"R128A16";
break;
1120case ImmTyA16:
OS <<
"A16";
break;
1121case ImmTyLWE:
OS <<
"LWE";
break;
1122case ImmTyOff:
OS <<
"Off";
break;
1123case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1124case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1125case ImmTyExpVM:
OS <<
"ExpVM";
break;
1126case ImmTyHwreg:
OS <<
"Hwreg";
break;
1127case ImmTySendMsg:
OS <<
"SendMsg";
break;
1128case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1129case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1130case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1131case ImmTyOpSel:
OS <<
"OpSel";
break;
1132case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1133case ImmTyNegLo:
OS <<
"NegLo";
break;
1134case ImmTyNegHi:
OS <<
"NegHi";
break;
1135case ImmTySwizzle:
OS <<
"Swizzle";
break;
1136case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1137case ImmTyHigh:
OS <<
"High";
break;
1138case ImmTyBLGP:
OS <<
"BLGP";
break;
1139case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1140case ImmTyABID:
OS <<
"ABID";
break;
1141case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1142case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1143case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1144case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1145case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1146case ImmTyByteSel:
OS <<
"ByteSel" ;
break;
1147case ImmTyBitOp3:
OS <<
"BitOp3";
break;
1156 <<
" mods: " <<
Reg.Mods <<
'>';
1159OS <<
'<' << getImm();
1160if (getImmTy() != ImmTyNone) {
1161OS <<
" type: "; printImmTy(
OS, getImmTy());
1163OS <<
" mods: " <<
Imm.Mods <<
'>';
1166OS <<
'\'' << getToken() <<
'\'';
1169OS <<
"<expr " << *Expr <<
'>';
1174static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1175 int64_t Val,
SMLoc Loc,
1176 ImmTy
Type = ImmTyNone,
1177bool IsFPImm =
false) {
1178autoOp = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1180Op->Imm.IsFPImm = IsFPImm;
1181Op->Imm.Kind = ImmKindTyNone;
1183Op->Imm.Mods = Modifiers();
1189static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1191bool HasExplicitEncodingSize =
true) {
1192auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1193 Res->Tok.Data = Str.data();
1194 Res->Tok.Length = Str.size();
1195 Res->StartLoc = Loc;
1200static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1202autoOp = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1204Op->Reg.Mods = Modifiers();
1210static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1212autoOp = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1221OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1225//===----------------------------------------------------------------------===// 1227//===----------------------------------------------------------------------===// 1229// Holds info related to the current kernel, e.g. count of SGPRs used. 1230// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1231// .amdgpu_hsa_kernel or at EOF. 1232classKernelScopeInfo {
1233int SgprIndexUnusedMin = -1;
1234int VgprIndexUnusedMin = -1;
1235int AgprIndexUnusedMin = -1;
1239void usesSgprAt(
int i) {
1240if (i >= SgprIndexUnusedMin) {
1241 SgprIndexUnusedMin = ++i;
1250void usesVgprAt(
int i) {
1251if (i >= VgprIndexUnusedMin) {
1252 VgprIndexUnusedMin = ++i;
1257 VgprIndexUnusedMin);
1263void usesAgprAt(
int i) {
1264// Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction 1268if (i >= AgprIndexUnusedMin) {
1269 AgprIndexUnusedMin = ++i;
1275// Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1279 VgprIndexUnusedMin);
1286 KernelScopeInfo() =
default;
1292 usesSgprAt(SgprIndexUnusedMin = -1);
1293 usesVgprAt(VgprIndexUnusedMin = -1);
1295 usesAgprAt(AgprIndexUnusedMin = -1);
1299void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1303 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1306 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1309 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1320unsigned ForcedEncodingSize = 0;
1321bool ForcedDPP =
false;
1322bool ForcedSDWA =
false;
1323 KernelScopeInfo KernelScope;
1325 /// @name Auto-generated Match Functions 1328#define GET_ASSEMBLER_HEADER 1329#include "AMDGPUGenAsmMatcher.inc" 1334void createConstantSymbol(
StringRef Id, int64_t Val);
1336bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1338 /// Calculate VGPR/SGPR blocks required for given target, reserved 1339 /// registers, and user-specified NextFreeXGPR values. 1341 /// \param Features [in] Target features, used for bug corrections. 1342 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1343 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1344 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1345 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1346 /// descriptor field, if valid. 1347 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1348 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1349 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1350 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1351 /// \param VGPRBlocks [out] Result VGPR block count. 1352 /// \param SGPRBlocks [out] Result SGPR block count. 1354constMCExpr *FlatScrUsed,
bool XNACKUsed,
1355 std::optional<bool> EnableWavefrontSize32,
1359bool ParseDirectiveAMDGCNTarget();
1360bool ParseDirectiveAMDHSACodeObjectVersion();
1361bool ParseDirectiveAMDHSAKernel();
1363bool ParseDirectiveAMDKernelCodeT();
1364// TODO: Possibly make subtargetHasRegister const. 1366bool ParseDirectiveAMDGPUHsaKernel();
1368bool ParseDirectiveISAVersion();
1369bool ParseDirectiveHSAMetadata();
1370bool ParseDirectivePALMetadataBegin();
1371bool ParseDirectivePALMetadata();
1372bool ParseDirectiveAMDGPULDS();
1374 /// Common code to parse out a block of text (typically YAML) between start and 1376bool ParseToEndDirective(
constchar *AssemblerDirectiveBegin,
1377constchar *AssemblerDirectiveEnd,
1378 std::string &CollectString);
1380bool AddNextRegisterToList(
MCRegister &Reg,
unsigned &RegWidth,
1382bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &Reg,
1383unsigned &RegNum,
unsigned &RegWidth,
1384bool RestoreOnFailure =
false);
1385bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &Reg,
1386unsigned &RegNum,
unsigned &RegWidth,
1388MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1391MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1394MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1397bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1398MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1403 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1404void initializeGprCountSymbol(RegisterKind RegKind);
1405bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1412 OperandMode_Default,
1416usingOptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1424if (getFeatureBits().
none()) {
1425// Set default features. 1430if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1431 !FB[AMDGPU::FeatureWavefrontSize32]) {
1432// If there is no default wave size it must be a generation before gfx10, 1433// these have FeatureWavefrontSize64 in their definition already. For 1434// gfx10+ set wave32 as a default. 1442 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1443 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1444 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1446 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1447 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1448 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1451 initializeGprCountSymbol(IS_VGPR);
1452 initializeGprCountSymbol(IS_SGPR);
1457 createConstantSymbol(Symbol, Code);
1459 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1460 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1461 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1494// TODO: isGFX90A is also true for GFX940. We need to clean it. 1531bool hasInv2PiInlineImm()
const{
1532return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1535bool hasFlatOffsets()
const{
1536return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1540return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1543bool hasSGPR102_SGPR103()
const{
1547bool hasSGPR104_SGPR105()
const{
returnisGFX10Plus(); }
1549bool hasIntClamp()
const{
1550return getFeatureBits()[AMDGPU::FeatureIntClamp];
1553bool hasPartialNSAEncoding()
const{
1554return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1573// We need this const_cast because for some reason getContext() is not const 1586void setForcedEncodingSize(
unsignedSize) { ForcedEncodingSize =
Size; }
1587void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1588void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1590unsigned getForcedEncodingSize()
const{
return ForcedEncodingSize; }
1591bool isForcedVOP3()
const{
return ForcedEncodingSize == 64; }
1592bool isForcedDPP()
const{
return ForcedDPP; }
1593bool isForcedSDWA()
const{
return ForcedSDWA; }
1597 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1599bool RestoreOnFailure);
1602SMLoc &EndLoc)
override;
1605unsigned Kind)
override;
1609bool MatchingInlineAsm)
override;
1612 OperandMode Mode = OperandMode_Default);
1616//bool ProcessInstruction(MCInst &Inst); 1624 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1625 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1629 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1630bool (*ConvertResult)(int64_t &) =
nullptr);
1634 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1648 AMDGPUOperand::ImmTy
Type);
1652bool isRegOrOperandModifier(
constAsmToken &Token,
constAsmToken &NextToken)
const;
1653bool isNamedOperandModifier(
constAsmToken &Token,
constAsmToken &NextToken)
const;
1654bool isOpcodeModifierWithVal(
constAsmToken &Token,
constAsmToken &NextToken)
const;
1655bool parseSP3NegModifier();
1662bool AllowImm =
true);
1664bool AllowImm =
true);
1669 AMDGPUOperand::ImmTy ImmTy);
1680ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1685bool tryParseFmt(
constchar *Pref, int64_t MaxVal, int64_t &Val);
1686bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1690bool parseCnt(int64_t &IntVal);
1693bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1697bool parseDelay(int64_t &Delay);
1703structOperandInfoTy {
1706bool IsSymbolic =
false;
1707bool IsDefined =
false;
1709 OperandInfoTy(int64_t Val) : Val(Val) {}
1712structStructuredOpField : OperandInfoTy {
1716bool IsDefined =
false;
1721virtual ~StructuredOpField() =
default;
1723boolError(AMDGPUAsmParser &Parser,
constTwine &Err)
const{
1724 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1728virtualboolvalidate(AMDGPUAsmParser &Parser)
const{
1730returnError(Parser,
"not supported on this GPU");
1732returnError(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1740bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1741bool validateSendMsg(
const OperandInfoTy &Msg,
1742const OperandInfoTy &
Op,
1743const OperandInfoTy &Stream);
1746 OperandInfoTy &Width);
1752SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1757bool SearchMandatoryLiterals =
false)
const;
1766bool validateSOPLiteral(
constMCInst &Inst)
const;
1768bool validateVOPDRegBankConstraints(
constMCInst &Inst,
1770bool validateIntClampSupported(
constMCInst &Inst);
1771bool validateMIMGAtomicDMask(
constMCInst &Inst);
1772bool validateMIMGGatherDMask(
constMCInst &Inst);
1774bool validateMIMGDataSize(
constMCInst &Inst,
constSMLoc &IDLoc);
1775bool validateMIMGAddrSize(
constMCInst &Inst,
constSMLoc &IDLoc);
1776bool validateMIMGD16(
constMCInst &Inst);
1778bool validateMIMGMSAA(
constMCInst &Inst);
1779bool validateOpSel(
constMCInst &Inst);
1787bool validateAGPRLdSt(
constMCInst &Inst)
const;
1788bool validateVGPRAlign(
constMCInst &Inst)
const;
1792bool validateDivScale(
constMCInst &Inst);
1799 std::optional<StringRef> validateLdsDirect(
constMCInst &Inst);
1800unsigned getConstantBusLimit(
unsigned Opcode)
const;
1801bool usesConstantBus(
constMCInst &Inst,
unsigned OpIdx);
1802bool isInlineConstant(
constMCInst &Inst,
unsigned OpIdx)
const;
1803unsigned findImplicitSGPRReadInVOP(
constMCInst &Inst)
const;
1829AsmToken peekToken(
bool ShouldSkipSpace =
true);
1835void onBeginOfFile()
override;
1836bool parsePrimaryExpr(
constMCExpr *&Res,
SMLoc &EndLoc)
override;
1847bool parseSwizzleOperand(int64_t &
Op,
constunsigned MinVal,
1848constunsigned MaxVal,
constTwine &ErrMsg,
1850bool parseSwizzleOperands(
constunsigned OpNum, int64_t*
Op,
1851constunsigned MinVal,
1852constunsigned MaxVal,
1855bool parseSwizzleOffset(int64_t &
Imm);
1856bool parseSwizzleMacro(int64_t &
Imm);
1857bool parseSwizzleQuadPerm(int64_t &
Imm);
1858bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1859bool parseSwizzleBroadcast(int64_t &
Imm);
1860bool parseSwizzleSwap(int64_t &
Imm);
1861bool parseSwizzleReverse(int64_t &
Imm);
1862bool parseSwizzleFFT(int64_t &
Imm);
1863bool parseSwizzleRotate(int64_t &
Imm);
1866 int64_t parseGPRIdxMacro();
1874 OptionalImmIndexMap &OptionalIdx);
1882 OptionalImmIndexMap &OptionalIdx);
1884 OptionalImmIndexMap &OptionalIdx);
1889bool parseDimId(
unsigned &Encoding);
1891bool convertDppBoundCtrl(int64_t &BoundCtrl);
1895 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1896 int64_t parseDPPCtrlPerm();
1908 AMDGPUOperand::ImmTy
Type);
1917bool SkipDstVcc =
false,
1918bool SkipSrcVcc =
false);
1925}
// end anonymous namespace 1927// May be called with integer type with equivalent bitwidth. 1931return &APFloat::IEEEsingle();
1933return &APFloat::IEEEdouble();
1935return &APFloat::IEEEhalf();
1947// When floating-point immediate is used as operand of type i16, the 32-bit 1948// representation of the constant truncated to the 16 LSBs should be used. 1968return &APFloat::IEEEsingle();
1974return &APFloat::IEEEdouble();
1983return &APFloat::IEEEhalf();
1991return &APFloat::BFloat();
1997//===----------------------------------------------------------------------===// 1999//===----------------------------------------------------------------------===// 2004// Convert literal to single precision 2006 APFloat::rmNearestTiesToEven,
2008// We allow precision lost but not overflow or underflow 2009if (
Status != APFloat::opOK &&
2011 ((
Status & APFloat::opOverflow) != 0 ||
2012 (
Status & APFloat::opUnderflow) != 0)) {
2035bool AMDGPUOperand::isInlinableImm(
MVT type)
const{
2037// This is a hack to enable named inline values like 2038// shared_base with both 32-bit and 64-bit operands. 2039// Note that these values are defined as 2040// 32-bit operands only. 2045if (!isImmTy(ImmTyNone)) {
2046// Only plain immediates are inlinable (e.g. "clamp" attribute is not) 2049// TODO: We should avoid using host float here. It would be better to 2050// check the float bit values which is what a few other places do. 2051// We've had bot failures before due to weird NaN support on mips hosts. 2055if (
Imm.IsFPImm) {
// We got fp literal token 2056if (type == MVT::f64 || type == MVT::i64) {
// Expected 64-bit operand 2058 AsmParser->hasInv2PiInlineImm());
2080 APFloat::rmNearestTiesToEven, &Lost);
2083// We need to use 32-bit representation here because when a floating-point 2084// inline constant is used as an i16 operand, its 32-bit representation 2085// representation will be used. We will need the 32-bit value to check if 2086// it is FP inline constant. 2087uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2089 AsmParser->hasInv2PiInlineImm());
2092// Check if single precision literal is inlinable 2094static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2095 AsmParser->hasInv2PiInlineImm());
2098// We got int literal token. 2099if (type == MVT::f64 || type == MVT::i64) {
// Expected 64-bit operand 2101 AsmParser->hasInv2PiInlineImm());
2110static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2111 type, AsmParser->hasInv2PiInlineImm());
2115static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2116 AsmParser->hasInv2PiInlineImm());
2119bool AMDGPUOperand::isLiteralImm(
MVT type)
const{
2120// Check that this immediate can be added as literal 2121if (!isImmTy(ImmTyNone)) {
2126// We got int literal token. 2128if (type == MVT::f64 && hasFPModifiers()) {
2129// Cannot apply fp modifiers to int literals preserving the same semantics 2130// for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2131// disable these cases. 2139// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2144// We got fp literal token 2145if (type == MVT::f64) {
// Expected 64-bit fp operand 2146// We would set low 64-bits of literal to zeroes but we accept this literals 2150if (type == MVT::i64) {
// Expected 64-bit int operand 2151// We don't allow fp literals in 64-bit integer instructions. It is 2152// unclear how we should encode them. 2156// We allow fp literals with f16x2 operands assuming that the specified 2157// literal goes into the lower half and the upper half is zero. We also 2158// require that the literal may be losslessly converted to f16. 2160// For i16x2 operands, we assume that the specified literal is encoded as a 2161// single-precision float. This is pretty odd, but it matches SP3 and what 2162// happens in hardware. 2163MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2164 : (type == MVT::v2i16) ? MVT::f32
2165 : (type == MVT::v2f32) ? MVT::f32
2172bool AMDGPUOperand::isRegClass(
unsigned RCID)
const{
2173return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2176bool AMDGPUOperand::isVRegWithInputMods()
const{
2177return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2178// GFX90A allows DPP on 64-bit operands. 2179 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2180 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2183template <
bool IsFake16>
2184bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const{
2185return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2186 : AMDGPU::VGPR_16_Lo128RegClassID);
2189template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const{
2190return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2191 : AMDGPU::VGPR_16RegClassID);
2194bool AMDGPUOperand::isSDWAOperand(
MVT type)
const{
2195if (AsmParser->isVI())
2197if (AsmParser->isGFX9Plus())
2198return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2202bool AMDGPUOperand::isSDWAFP16Operand()
const{
2203return isSDWAOperand(MVT::f16);
2206bool AMDGPUOperand::isSDWAFP32Operand()
const{
2207return isSDWAOperand(MVT::f32);
2210bool AMDGPUOperand::isSDWAInt16Operand()
const{
2211return isSDWAOperand(MVT::i16);
2214bool AMDGPUOperand::isSDWAInt32Operand()
const{
2215return isSDWAOperand(MVT::i32);
2218bool AMDGPUOperand::isBoolReg()
const{
2219auto FB = AsmParser->getFeatureBits();
2220returnisReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2221 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2226assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2241void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsignedN,
bool ApplyModifiers)
const{
2249 addLiteralImmOperand(Inst,
Imm.Val,
2251 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2253assert(!isImmTy(ImmTyNone) || !hasModifiers());
2259void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const{
2260constauto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2262// Check that this operand accepts literals 2265if (ApplyModifiers) {
2268 Val = applyInputFPModifiers(Val,
Size);
2272uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2274if (
Imm.IsFPImm) {
// We got fp literal token 2282 AsmParser->hasInv2PiInlineImm())) {
2290// For fp operands we check if low 32 bits are zeros 2291if (
Literal.getLoBits(32) != 0) {
2292const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2293"Can't encode literal as exact 64-bit floating-point operand. " 2294"Low 32-bits will be set to zero");
2295 Val &= 0xffffffff00000000u;
2299 setImmKindLiteral();
2303// We don't allow fp literals in 64-bit integer instructions. It is 2304// unclear how we should encode them. This case should be checked earlier 2305// in predicate methods (isLiteralImm()) 2315if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2316// This is the 1/(2*pi) which is going to be truncated to bf16 with the 2317// loss of precision. The constant represents ideomatic fp32 value of 2318// 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 2319// bits. Prevent rounding below. 2321 setImmKindLiteral();
2355// Convert literal to single precision 2357 APFloat::rmNearestTiesToEven, &lost);
2358// We allow precision lost but not overflow or underflow. This should be 2359// checked earlier in isLiteralImm() 2361uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2364 setImmKindMandatoryLiteral();
2366 setImmKindLiteral();
2377// We got int literal token. 2378// Only sign extend inline immediates. 2397 AsmParser->hasInv2PiInlineImm())) {
2404 setImmKindLiteral();
2422 setImmKindLiteral();
2436 setImmKindLiteral();
2445 AsmParser->hasInv2PiInlineImm())) {
2452 setImmKindLiteral();
2461 AsmParser->hasInv2PiInlineImm())) {
2468 setImmKindLiteral();
2482 AsmParser->hasInv2PiInlineImm()));
2492 AsmParser->hasInv2PiInlineImm()));
2500 setImmKindMandatoryLiteral();
2504 setImmKindMandatoryLiteral();
2511void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsignedN)
const{
2515bool AMDGPUOperand::isInlineValue()
const{
2519//===----------------------------------------------------------------------===// 2521//===----------------------------------------------------------------------===// 2523void AMDGPUAsmParser::createConstantSymbol(
StringRef Id, int64_t Val) {
2524// TODO: make those pre-defined variables read-only. 2525// Currently there is none suitable machinery in the core llvm-mc for this. 2526// MCSymbol::isRedefinable is intended for another purpose, and 2527// AsmParser::parseDirectiveSet() cannot be specialized for specific target. 2538return AMDGPU::VGPR_32RegClassID;
2540return AMDGPU::VReg_64RegClassID;
2542return AMDGPU::VReg_96RegClassID;
2544return AMDGPU::VReg_128RegClassID;
2546return AMDGPU::VReg_160RegClassID;
2548return AMDGPU::VReg_192RegClassID;
2550return AMDGPU::VReg_224RegClassID;
2552return AMDGPU::VReg_256RegClassID;
2554return AMDGPU::VReg_288RegClassID;
2556return AMDGPU::VReg_320RegClassID;
2558return AMDGPU::VReg_352RegClassID;
2560return AMDGPU::VReg_384RegClassID;
2562return AMDGPU::VReg_512RegClassID;
2564return AMDGPU::VReg_1024RegClassID;
2566 }
elseif (Is == IS_TTMP) {
2570return AMDGPU::TTMP_32RegClassID;
2572return AMDGPU::TTMP_64RegClassID;
2574return AMDGPU::TTMP_128RegClassID;
2576return AMDGPU::TTMP_256RegClassID;
2578return AMDGPU::TTMP_512RegClassID;
2580 }
elseif (Is == IS_SGPR) {
2584return AMDGPU::SGPR_32RegClassID;
2586return AMDGPU::SGPR_64RegClassID;
2588return AMDGPU::SGPR_96RegClassID;
2590return AMDGPU::SGPR_128RegClassID;
2592return AMDGPU::SGPR_160RegClassID;
2594return AMDGPU::SGPR_192RegClassID;
2596return AMDGPU::SGPR_224RegClassID;
2598return AMDGPU::SGPR_256RegClassID;
2600return AMDGPU::SGPR_288RegClassID;
2602return AMDGPU::SGPR_320RegClassID;
2604return AMDGPU::SGPR_352RegClassID;
2606return AMDGPU::SGPR_384RegClassID;
2608return AMDGPU::SGPR_512RegClassID;
2610 }
elseif (Is == IS_AGPR) {
2614return AMDGPU::AGPR_32RegClassID;
2616return AMDGPU::AReg_64RegClassID;
2618return AMDGPU::AReg_96RegClassID;
2620return AMDGPU::AReg_128RegClassID;
2622return AMDGPU::AReg_160RegClassID;
2624return AMDGPU::AReg_192RegClassID;
2626return AMDGPU::AReg_224RegClassID;
2628return AMDGPU::AReg_256RegClassID;
2630return AMDGPU::AReg_288RegClassID;
2632return AMDGPU::AReg_320RegClassID;
2634return AMDGPU::AReg_352RegClassID;
2636return AMDGPU::AReg_384RegClassID;
2638return AMDGPU::AReg_512RegClassID;
2640return AMDGPU::AReg_1024RegClassID;
2648 .
Case(
"exec", AMDGPU::EXEC)
2649 .
Case(
"vcc", AMDGPU::VCC)
2650 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2651 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2652 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2653 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2654 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2655 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2656 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2657 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2658 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2659 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2660 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2661 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2662 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2663 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2664 .
Case(
"m0", AMDGPU::M0)
2665 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2666 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2667 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2668 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2669 .
Case(
"scc", AMDGPU::SRC_SCC)
2670 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2671 .
Case(
"tba", AMDGPU::TBA)
2672 .
Case(
"tma", AMDGPU::TMA)
2673 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2674 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2675 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2676 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2677 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2678 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2679 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2680 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2681 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2682 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2683 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2684 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2685 .
Case(
"pc", AMDGPU::PC_REG)
2686 .
Case(
"null", AMDGPU::SGPR_NULL)
2691SMLoc &EndLoc,
bool RestoreOnFailure) {
2692autoR = parseRegister();
2696 StartLoc =
R->getStartLoc();
2697 EndLoc =
R->getEndLoc();
2703return ParseRegister(Reg, StartLoc, EndLoc,
/*RestoreOnFailure=*/false);
2708boolResult = ParseRegister(Reg, StartLoc, EndLoc,
/*RestoreOnFailure=*/true);
2709bool PendingErrors = getParser().hasPendingError();
2710 getParser().clearPendingErrors();
2718bool AMDGPUAsmParser::AddNextRegisterToList(
MCRegister &Reg,
unsigned &RegWidth,
2719 RegisterKind RegKind,
2723if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2728if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2729Reg = AMDGPU::FLAT_SCR;
2733if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2734Reg = AMDGPU::XNACK_MASK;
2738if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2743if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2748if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2753Error(Loc,
"register does not fit in the list");
2759if (Reg1 != Reg + RegWidth / 32) {
2760Error(Loc,
"registers in a list must have consecutive indices");
2778 {{
"ttmp"}, IS_TTMP},
2784return Kind == IS_VGPR ||
2792if (Str.starts_with(Reg.Name))
2798return !Str.getAsInteger(10, Num);
2802AMDGPUAsmParser::isRegister(
constAsmToken &Token,
2805// A list of consecutive registers: [s0,s1,s2,s3] 2812// A single register like s0 or a range of registers like s[0:1] 2819if (!RegSuffix.
empty()) {
2823// A single register with an index: rXX 2827// A range of registers: r[XX:YY]. 2837AMDGPUAsmParser::isRegister()
2839return isRegister(getToken(), peekToken());
2842MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2843unsignedSubReg,
unsigned RegWidth,
2847unsigned AlignSize = 1;
2848if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2849// SGPR and TTMP registers must be aligned. 2850// Max required alignment is 4 dwords. 2854if (RegNum % AlignSize != 0) {
2855Error(Loc,
"invalid register alignment");
2859unsigned RegIdx = RegNum / AlignSize;
2862Error(Loc,
"invalid or unsupported register size");
2869Error(Loc,
"register index is out of range");
2878// Currently all regular registers have their .l and .h subregisters, so 2879// we should never need to generate an error here. 2880assert(Reg &&
"Invalid subregister!");
2886bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2887 int64_t RegLo, RegHi;
2891SMLoc FirstIdxLoc = getLoc();
2898 SecondIdxLoc = getLoc();
2908if (!isUInt<32>(RegLo)) {
2909Error(FirstIdxLoc,
"invalid register index");
2913if (!isUInt<32>(RegHi)) {
2914Error(SecondIdxLoc,
"invalid register index");
2919Error(FirstIdxLoc,
"first register index should not exceed second index");
2923 Num =
static_cast<unsigned>(RegLo);
2924 RegWidth = 32 * ((RegHi - RegLo) + 1);
2928MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2937 RegKind = IS_SPECIAL;
2939 lex();
// skip register name 2944MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2954Error(Loc,
"invalid register name");
2959 lex();
// skip register name 2963unsignedSubReg = NoSubRegister;
2964if (!RegSuffix.
empty()) {
2970// Single 32-bit register: vXX. 2972Error(Loc,
"invalid register index");
2977// Range of registers: v[XX:YY]. ":YY" is optional. 2978if (!ParseRegRange(RegNum, RegWidth))
2982return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2985MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2986unsigned &RegNum,
unsigned &RegWidth,
2989auto ListLoc = getLoc();
2992"expected a register or a list of registers")) {
2996// List of consecutive registers, e.g.: [s0,s1,s2,s3] 2999if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3001if (RegWidth != 32) {
3002Error(Loc,
"expected a single 32-bit register");
3007 RegisterKind NextRegKind;
3009unsigned NextRegNum, NextRegWidth;
3012if (!ParseAMDGPURegister(NextRegKind, NextReg,
3013 NextRegNum, NextRegWidth,
3017if (NextRegWidth != 32) {
3018Error(Loc,
"expected a single 32-bit register");
3021if (NextRegKind != RegKind) {
3022Error(Loc,
"registers in a list must be of the same kind");
3025if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3030"expected a comma or a closing square bracket")) {
3035Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3040bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3048Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3050Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3052Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3057assert(Parser.hasPendingError());
3061if (!subtargetHasRegister(*
TRI, Reg)) {
3062if (Reg == AMDGPU::SGPR_NULL) {
3063Error(Loc,
"'null' operand is not supported on this GPU");
3066" register not available on this GPU");
3074bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3077bool RestoreOnFailure
/*=false*/) {
3081if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3082if (RestoreOnFailure) {
3083while (!Tokens.
empty()) {
3092std::optional<StringRef>
3093AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3096returnStringRef(
".amdgcn.next_free_vgpr");
3098returnStringRef(
".amdgcn.next_free_sgpr");
3104void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3105autoSymbolName = getGprCountSymbolName(RegKind);
3106assert(SymbolName &&
"initializing invalid register kind");
3107MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3111bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3112unsigned DwordRegIndex,
3114// Symbols are only defined for GCN targets 3118autoSymbolName = getGprCountSymbolName(RegKind);
3121MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3123 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3126if (!
Sym->isVariable())
3127return !
Error(getLoc(),
3128".amdgcn.next_free_{v,s}gpr symbols must be variable");
3129if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3132".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3134if (OldCount <= NewMax)
3140std::unique_ptr<AMDGPUOperand>
3141AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3142constauto &Tok = getToken();
3143SMLoc StartLoc = Tok.getLoc();
3144SMLoc EndLoc = Tok.getEndLoc();
3145 RegisterKind RegKind;
3147unsigned RegNum, RegWidth;
3149if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3153if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3156 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3157return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3161bool HasSP3AbsModifier,
bool HasLit) {
3162// TODO: add syntactic sugar for 1/(2*PI) 3169 HasLit = trySkipId(
"lit");
3181constauto& Tok = getToken();
3182constauto& NextTok = peekToken();
3193 AMDGPUOperand::Modifiers Mods;
3197// Floating-point expressions are not supported. 3198// Can only allow floating-point literals with an 3204APFloat RealVal(APFloat::IEEEdouble());
3205auto roundMode = APFloat::rmNearestTiesToEven;
3206if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3209 RealVal.changeSign();
3212 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3213 AMDGPUOperand::ImmTyNone,
true));
3214 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3215Op.setModifiers(Mods);
3224if (HasSP3AbsModifier) {
3225// This is a workaround for handling expressions 3226// as arguments of SP3 'abs' modifier, for example: 3230// This syntax is not compatible with syntax of standard 3231// MC expressions (due to the trailing '|'). 3233if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3236if (Parser.parseExpression(Expr))
3240if (Expr->evaluateAsAbsolute(IntVal)) {
3241Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3242 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3243Op.setModifiers(Mods);
3247Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3260if (
auto R = parseRegister()) {
3269bool HasSP3AbsMod,
bool HasLit) {
3275return parseImm(
Operands, HasSP3AbsMod, HasLit);
3279AMDGPUAsmParser::isNamedOperandModifier(
constAsmToken &Token,
constAsmToken &NextToken)
const{
3282return str ==
"abs" || str ==
"neg" || str ==
"sext";
3288AMDGPUAsmParser::isOpcodeModifierWithVal(
constAsmToken &Token,
constAsmToken &NextToken)
const{
3293AMDGPUAsmParser::isOperandModifier(
constAsmToken &Token,
constAsmToken &NextToken)
const{
3294return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3298AMDGPUAsmParser::isRegOrOperandModifier(
constAsmToken &Token,
constAsmToken &NextToken)
const{
3299return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3302// Check if this is an operand modifier or an opcode modifier 3303// which may look like an expression but it is not. We should 3304// avoid parsing these modifiers as expressions. Currently 3305// recognized sequences are: 3316AMDGPUAsmParser::isModifier() {
3320 peekTokens(NextToken);
3322return isOperandModifier(Tok, NextToken[0]) ||
3323 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3324 isOpcodeModifierWithVal(Tok, NextToken[0]);
3327// Check if the current token is an SP3 'neg' modifier. 3328// Currently this modifier is allowed in the following context: 3330// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3331// 2. Before an 'abs' modifier: -abs(...) 3332// 3. Before an SP3 'abs' modifier: -|...| 3334// In all other cases "-" is handled as a part 3335// of an expression that follows the sign. 3337// Note: When "-" is followed by an integer literal, 3338// this is interpreted as integer negation rather 3339// than a floating-point NEG modifier applied to N. 3340// Beside being contr-intuitive, such use of floating-point 3341// NEG modifier would have resulted in different meaning 3342// of integer literals used with VOP1/2/C and VOP3, 3344// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3345// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3346// Negative fp literals with preceding "-" are 3347// handled likewise for uniformity 3350AMDGPUAsmParser::parseSP3NegModifier() {
3353 peekTokens(NextToken);
3356 (isRegister(NextToken[0], NextToken[1]) ||
3358 isId(NextToken[0],
"abs"))) {
3374// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3376returnError(getLoc(),
"invalid syntax, expected 'neg' modifier");
3378 SP3Neg = parseSP3NegModifier();
3381 Neg = trySkipId(
"neg");
3383returnError(Loc,
"expected register or immediate");
3387 Abs = trySkipId(
"abs");
3391 Lit = trySkipId(
"lit");
3398returnError(Loc,
"expected register or immediate");
3402 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3409if (Lit && !
Operands.back()->isImm())
3410Error(Loc,
"expected immediate with lit modifier");
3421 AMDGPUOperand::Modifiers Mods;
3422 Mods.Abs = Abs || SP3Abs;
3423 Mods.Neg = Neg || SP3Neg;
3426if (Mods.hasFPModifiers() || Lit) {
3427 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3429returnError(
Op.getStartLoc(),
"expected an absolute expression");
3430Op.setModifiers(Mods);
3438bool Sext = trySkipId(
"sext");
3454 AMDGPUOperand::Modifiers Mods;
3457if (Mods.hasIntModifiers()) {
3458 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3460returnError(
Op.getStartLoc(),
"expected an absolute expression");
3461Op.setModifiers(Mods);
3468return parseRegOrImmWithFPInputMods(
Operands,
false);
3472return parseRegOrImmWithIntInputMods(
Operands,
false);
3477if (trySkipId(
"off")) {
3478Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3479 AMDGPUOperand::ImmTyOff,
false));
3486 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3495unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3502return Match_InvalidOperand;
3504if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3505 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3506// v_mac_f32/16 allow only dst_sel == DWORD; 3510if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3511return Match_InvalidOperand;
3515return Match_Success;
3519staticconstunsigned Variants[] = {
3528// What asm variants we should check 3530if (isForcedDPP() && isForcedVOP3()) {
3534if (getForcedEncodingSize() == 32) {
3539if (isForcedVOP3()) {
3544if (isForcedSDWA()) {
3558StringRef AMDGPUAsmParser::getMatchedVariantName()
const{
3559if (isForcedDPP() && isForcedVOP3())
3562if (getForcedEncodingSize() == 32)
3577unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
constMCInst &Inst)
const{
3581case AMDGPU::FLAT_SCR:
3591return AMDGPU::NoRegister;
3594// NB: This code is correct only when used to check constant 3595// bus limitations because GFX7 support no f16 inline constants. 3596// Note that there are no cases when a GFX7 opcode violates 3597// constant bus limitations due to the use of an f16 constant. 3598bool AMDGPUAsmParser::isInlineConstant(
constMCInst &Inst,
3599unsigned OpIdx)
const{
3609 int64_t Val = MO.
getImm();
3612switch (OpSize) {
// expected operand size 3658unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const{
3663// 64-bit shift instructions can use only one scalar value input 3664case AMDGPU::V_LSHLREV_B64_e64:
3665case AMDGPU::V_LSHLREV_B64_gfx10:
3666case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3667case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3668case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3669case AMDGPU::V_LSHRREV_B64_e64:
3670case AMDGPU::V_LSHRREV_B64_gfx10:
3671case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3672case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3673case AMDGPU::V_ASHRREV_I64_e64:
3674case AMDGPU::V_ASHRREV_I64_gfx10:
3675case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3676case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3677case AMDGPU::V_LSHL_B64_e64:
3678case AMDGPU::V_LSHR_B64_e64:
3679case AMDGPU::V_ASHR_I64_e64:
3689// Get regular operand indices in the same order as specified 3690// in the instruction (but append mandatory literals to the end). 3692bool AddMandatoryLiterals =
false) {
3698 int16_t ImmDeferredIdx =
3715bool AMDGPUAsmParser::usesConstantBus(
constMCInst &Inst,
unsigned OpIdx) {
3718return !isInlineConstant(Inst, OpIdx);
3725returnisSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3730// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`: 3731// Writelane is special in that it can use SGPR and M0 (which would normally 3732// count as using the constant bus twice - but in this case it is allowed since 3733// the lane selector doesn't count as a use of the constant bus). However, it is 3734// still required to abide by the 1 SGPR rule. 3737if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3740if (!LaneSelOp.
isReg())
3743return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3746bool AMDGPUAsmParser::validateConstantBusLimitations(
3751unsigned ConstantBusUseCount = 0;
3752unsigned NumLiterals = 0;
3753unsigned LiteralSize;
3764// Check special imm operands (used by madmk, etc) 3771unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3772if (SGPRUsed != AMDGPU::NoRegister) {
3773 SGPRsUsed.
insert(SGPRUsed);
3774 ++ConstantBusUseCount;
3779for (
int OpIdx : OpIndices) {
3784if (usesConstantBus(Inst, OpIdx)) {
3787// Pairs of registers with a partial intersections like these 3789// flat_scratch_lo, flat_scratch 3790// flat_scratch_lo, flat_scratch_hi 3791// are theoretically valid but they are disabled anyway. 3792// Note that this code mimics SIInstrInfo::verifyInstruction 3793if (SGPRsUsed.
insert(LastSGPR).second) {
3794 ++ConstantBusUseCount;
3796 }
else {
// Expression or a literal 3799continue;
// special operand like VINTERP attr_chan 3801// An instruction may use only one literal. 3802// This has been validated on the previous step. 3803// See validateVOPLiteral. 3804// This literal may be used as more than one operand. 3805// If all these operands are of the same size, 3806// this literal counts as one scalar value. 3807// Otherwise it counts as 2 scalar values. 3808// See "GFX10 Shader Programming", section 3.6.2.3. 3814if (NumLiterals == 0) {
3817 }
elseif (LiteralSize !=
Size) {
3823 ConstantBusUseCount += NumLiterals;
3825if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3831Error(Loc,
"invalid operand (violates constant bus restrictions)");
3835bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3844auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3851// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 3852bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3855auto InvalidCompOprIdx =
3856 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3857if (!InvalidCompOprIdx)
3860auto CompOprIdx = *InvalidCompOprIdx;
3862 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3863 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3866auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3867if (CompOprIdx == VOPD::Component::DST) {
3868Error(Loc,
"one dst register must be even and the other odd");
3870auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3872" operands must use different VGPR banks");
3878bool AMDGPUAsmParser::validateIntClampSupported(
constMCInst &Inst) {
3895bool AMDGPUAsmParser::validateMIMGDataSize(
constMCInst &Inst,
3908if (VDataIdx == -1 &&
isGFX10Plus())
// no return image_sample 3915unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3920bool IsPackedD16 =
false;
3925 IsPackedD16 = D16Idx >= 0;
3927 DataSize = (DataSize + 1) / 2;
3930if ((VDataSize / 4) == DataSize + TFESize)
3935 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3937 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3939Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3943bool AMDGPUAsmParser::validateMIMGAddrSize(
constMCInst &Inst,
3957 : AMDGPU::OpName::rsrc;
3964assert(SrsrcIdx > VAddr0Idx);
3967if (BaseOpcode->
BVH) {
3968if (IsA16 == BaseOpcode->
A16)
3970Error(IDLoc,
"image address size does not match a16");
3976bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3977unsigned ActualAddrSize =
3978 IsNSA ? SrsrcIdx - VAddr0Idx
3981unsigned ExpectedAddrSize =
3985if (hasPartialNSAEncoding() &&
3988int VAddrLastIdx = SrsrcIdx - 1;
3989unsigned VAddrLastSize =
3992 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3995if (ExpectedAddrSize > 12)
3996 ExpectedAddrSize = 16;
3998// Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3999// This provides backward compatibility for assembly created 4000// before 160b/192b/224b types were directly supported. 4001if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4005if (ActualAddrSize == ExpectedAddrSize)
4008Error(IDLoc,
"image address size does not match dim and a16");
4012bool AMDGPUAsmParser::validateMIMGAtomicDMask(
constMCInst &Inst) {
4019if (!
Desc.mayLoad() || !
Desc.mayStore())
4020returntrue;
// Not atomic 4025// This is an incomplete check because image_atomic_cmpswap 4026// may only use 0x3 and 0xf while other atomic operations 4027// may use 0x1 and 0x3. However these limitations are 4028// verified when we check that dmask matches dst size. 4029return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4032bool AMDGPUAsmParser::validateMIMGGatherDMask(
constMCInst &Inst) {
4043// GATHER4 instructions use dmask in a different fashion compared to 4044// other MIMG instructions. The only useful DMASK values are 4045// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 4046// (red,red,red,red) etc.) The ISA document doesn't mention 4048return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4051bool AMDGPUAsmParser::validateMIMGDim(
constMCInst &Inst,
4062// image_bvh_intersect_ray instructions do not have dim 4066for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4067 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4074bool AMDGPUAsmParser::validateMIMGMSAA(
constMCInst &Inst) {
4085if (!BaseOpcode->
MSAA)
4094return DimInfo->
MSAA;
4100case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4101case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4102case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4109// movrels* opcodes should only allow VGPRS as src0. 4110// This is specified in .td description for vop1/vop3, 4111// but sdwa is handled differently. See isSDWAOperand. 4112bool AMDGPUAsmParser::validateMovrels(
constMCInst &Inst,
4136Error(ErrLoc,
"source operand must be a VGPR");
4140bool AMDGPUAsmParser::validateMAIAccWrite(
constMCInst &Inst,
4145if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4159"source operand must be either a VGPR or an inline constant");
4166bool AMDGPUAsmParser::validateMAISrc2(
constMCInst &Inst,
4172 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4179if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4181"inline constants are not allowed for this operand");
4188bool AMDGPUAsmParser::validateMFMA(
constMCInst &Inst,
4204// Validate the correct register size was used for the floating point 4212"wrong register tuple size for cbsz value " +
Twine(CBSZ));
4220"wrong register tuple size for blgp value " +
Twine(BLGP));
4238if (Src2Reg == DstReg)
4242if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4245if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4247"source 2 operand must not partially overlap with dst");
4254bool AMDGPUAsmParser::validateDivScale(
constMCInst &Inst) {
4258case V_DIV_SCALE_F32_gfx6_gfx7:
4259case V_DIV_SCALE_F32_vi:
4260case V_DIV_SCALE_F32_gfx10:
4261case V_DIV_SCALE_F64_gfx6_gfx7:
4262case V_DIV_SCALE_F64_vi:
4263case V_DIV_SCALE_F64_gfx10:
4267// TODO: Check that src0 = src1 or src2. 4269for (
autoName : {AMDGPU::OpName::src0_modifiers,
4270 AMDGPU::OpName::src2_modifiers,
4271 AMDGPU::OpName::src2_modifiers}) {
4282bool AMDGPUAsmParser::validateMIMGD16(
constMCInst &Inst) {
4302case AMDGPU::V_SUBREV_F32_e32:
4303case AMDGPU::V_SUBREV_F32_e64:
4304case AMDGPU::V_SUBREV_F32_e32_gfx10:
4305case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4306case AMDGPU::V_SUBREV_F32_e32_vi:
4307case AMDGPU::V_SUBREV_F32_e64_gfx10:
4308case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4309case AMDGPU::V_SUBREV_F32_e64_vi:
4311case AMDGPU::V_SUBREV_CO_U32_e32:
4312case AMDGPU::V_SUBREV_CO_U32_e64:
4313case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4314case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4316case AMDGPU::V_SUBBREV_U32_e32:
4317case AMDGPU::V_SUBBREV_U32_e64:
4318case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4319case AMDGPU::V_SUBBREV_U32_e32_vi:
4320case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4321case AMDGPU::V_SUBBREV_U32_e64_vi:
4323case AMDGPU::V_SUBREV_U32_e32:
4324case AMDGPU::V_SUBREV_U32_e64:
4325case AMDGPU::V_SUBREV_U32_e32_gfx9:
4326case AMDGPU::V_SUBREV_U32_e32_vi:
4327case AMDGPU::V_SUBREV_U32_e64_gfx9:
4328case AMDGPU::V_SUBREV_U32_e64_vi:
4330case AMDGPU::V_SUBREV_F16_e32:
4331case AMDGPU::V_SUBREV_F16_e64:
4332case AMDGPU::V_SUBREV_F16_e32_gfx10:
4333case AMDGPU::V_SUBREV_F16_e32_vi:
4334case AMDGPU::V_SUBREV_F16_e64_gfx10:
4335case AMDGPU::V_SUBREV_F16_e64_vi:
4337case AMDGPU::V_SUBREV_U16_e32:
4338case AMDGPU::V_SUBREV_U16_e64:
4339case AMDGPU::V_SUBREV_U16_e32_vi:
4340case AMDGPU::V_SUBREV_U16_e64_vi:
4342case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4343case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4344case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4346case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4347case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4349case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4350case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4352case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4353case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4355case AMDGPU::V_LSHRREV_B32_e32:
4356case AMDGPU::V_LSHRREV_B32_e64:
4357case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4358case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4359case AMDGPU::V_LSHRREV_B32_e32_vi:
4360case AMDGPU::V_LSHRREV_B32_e64_vi:
4361case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4362case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4364case AMDGPU::V_ASHRREV_I32_e32:
4365case AMDGPU::V_ASHRREV_I32_e64:
4366case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4367case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4368case AMDGPU::V_ASHRREV_I32_e32_vi:
4369case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4370case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4371case AMDGPU::V_ASHRREV_I32_e64_vi:
4373case AMDGPU::V_LSHLREV_B32_e32:
4374case AMDGPU::V_LSHLREV_B32_e64:
4375case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4376case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4377case AMDGPU::V_LSHLREV_B32_e32_vi:
4378case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4379case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4380case AMDGPU::V_LSHLREV_B32_e64_vi:
4382case AMDGPU::V_LSHLREV_B16_e32:
4383case AMDGPU::V_LSHLREV_B16_e64:
4384case AMDGPU::V_LSHLREV_B16_e32_vi:
4385case AMDGPU::V_LSHLREV_B16_e64_vi:
4386case AMDGPU::V_LSHLREV_B16_gfx10:
4388case AMDGPU::V_LSHRREV_B16_e32:
4389case AMDGPU::V_LSHRREV_B16_e64:
4390case AMDGPU::V_LSHRREV_B16_e32_vi:
4391case AMDGPU::V_LSHRREV_B16_e64_vi:
4392case AMDGPU::V_LSHRREV_B16_gfx10:
4394case AMDGPU::V_ASHRREV_I16_e32:
4395case AMDGPU::V_ASHRREV_I16_e64:
4396case AMDGPU::V_ASHRREV_I16_e32_vi:
4397case AMDGPU::V_ASHRREV_I16_e64_vi:
4398case AMDGPU::V_ASHRREV_I16_gfx10:
4400case AMDGPU::V_LSHLREV_B64_e64:
4401case AMDGPU::V_LSHLREV_B64_gfx10:
4402case AMDGPU::V_LSHLREV_B64_vi:
4404case AMDGPU::V_LSHRREV_B64_e64:
4405case AMDGPU::V_LSHRREV_B64_gfx10:
4406case AMDGPU::V_LSHRREV_B64_vi:
4408case AMDGPU::V_ASHRREV_I64_e64:
4409case AMDGPU::V_ASHRREV_I64_gfx10:
4410case AMDGPU::V_ASHRREV_I64_vi:
4412case AMDGPU::V_PK_LSHLREV_B16:
4413case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4414case AMDGPU::V_PK_LSHLREV_B16_vi:
4416case AMDGPU::V_PK_LSHRREV_B16:
4417case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4418case AMDGPU::V_PK_LSHRREV_B16_vi:
4419case AMDGPU::V_PK_ASHRREV_I16:
4420case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4421case AMDGPU::V_PK_ASHRREV_I16_vi:
4428std::optional<StringRef>
4429AMDGPUAsmParser::validateLdsDirect(
constMCInst &Inst) {
4431using namespaceSIInstrFlags;
4435// lds_direct register is defined so that it can be used 4436// with 9-bit operands only. Ignore encodings which do not accept these. 4438if ((
Desc.TSFlags & Enc) == 0)
4441for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4446if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4449returnStringRef(
"lds_direct is not supported on this GPU");
4452returnStringRef(
"lds_direct cannot be used with this instruction");
4454if (SrcName != OpName::src0)
4455returnStringRef(
"lds_direct may be used as src0 only");
4463for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4464 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4465if (
Op.isFlatOffset())
4466returnOp.getStartLoc();
4471bool AMDGPUAsmParser::validateOffset(
constMCInst &Inst,
4480return validateFlatOffset(Inst,
Operands);
4483return validateSMEMOffset(Inst,
Operands);
4488constunsigned OffsetSize = 24;
4489if (!
isIntN(OffsetSize,
Op.getImm())) {
4491Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4495constunsigned OffsetSize = 16;
4498Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4505bool AMDGPUAsmParser::validateFlatOffset(
constMCInst &Inst,
4516if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4518"flat offset modifier is not supported on this GPU");
4522// For pre-GFX12 FLAT instructions the offset must be positive; 4523// MSB is ignored and forced to zero. 4528if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4530Twine(
"expected a ") +
4531 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset" 4532 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4540// Start with second operand because SMEM Offset cannot be dst or src0. 4541for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4542 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4543if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4544returnOp.getStartLoc();
4549bool AMDGPUAsmParser::validateSMEMOffset(
constMCInst &Inst,
4575 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset" 4576 :
"expected a 21-bit signed offset");
4581bool AMDGPUAsmParser::validateSOPLiteral(
constMCInst &Inst)
const{
4590constint OpIndices[] = { Src0Idx, Src1Idx };
4592unsigned NumExprs = 0;
4593unsigned NumLiterals = 0;
4596for (
int OpIdx : OpIndices) {
4597if (OpIdx == -1)
break;
4600// Exclude special imm operands (like that used by s_set_gpr_idx_on) 4602if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4604if (NumLiterals == 0 || LiteralValue !=
Value) {
4614return NumLiterals + NumExprs <= 1;
4617bool AMDGPUAsmParser::validateOpSel(
constMCInst &Inst) {
4631if (OpSelIdx != -1) {
4636if (OpSelHiIdx != -1) {
4642// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4654bool AMDGPUAsmParser::validateNeg(
constMCInst &Inst,
intOpName) {
4660// v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) 4661// v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) 4662// v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) 4663// other wmma/swmmac instructions don't have neg_lo/neg_hi operand. 4674// Instructions that have neg_lo or neg_hi operand but neg modifier is allowed 4675// on some src operands but not allowed on other. 4676// It is convenient that such instructions don't have src_modifiers operand 4677// for src operands that don't allow neg because they also don't allow opsel. 4679int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4680 AMDGPU::OpName::src1_modifiers,
4681 AMDGPU::OpName::src2_modifiers};
4683for (
unsigned i = 0; i < 3; ++i) {
4693bool AMDGPUAsmParser::validateDPP(
constMCInst &Inst,
4697if (DppCtrlIdx >= 0) {
4702// DP ALU DPP is supported for row_newbcast only on GFX9* 4704Error(S,
"DP ALU dpp only supports row_newbcast");
4710bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4720Error(S,
"invalid operand for instruction");
4725"src1 immediate operand invalid for instruction");
4734// Check if VCC register matches wavefront size 4735bool AMDGPUAsmParser::validateVccOperand(
MCRegister Reg)
const{
4736auto FB = getFeatureBits();
4737return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4738 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4741// One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4742bool AMDGPUAsmParser::validateVOPLiteral(
constMCInst &Inst,
4748 !HasMandatoryLiteral && !
isVOPD(Opcode))
4753unsigned NumExprs = 0;
4754unsigned NumLiterals = 0;
4757for (
int OpIdx : OpIndices) {
4767if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4773if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4774Error(getLitLoc(
Operands),
"invalid operand for instruction");
4778if (IsFP64 && IsValid32Op)
4781if (NumLiterals == 0 || LiteralValue !=
Value) {
4789 NumLiterals += NumExprs;
4794if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4795Error(getLitLoc(
Operands),
"literal operands are not supported");
4799if (NumLiterals > 1) {
4800Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4807// Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4819auto Reg = Sub ? Sub :
Op.getReg();
4821return AGPR32.
contains(Reg) ? 1 : 0;
4824bool AMDGPUAsmParser::validateAGPRLdSt(
constMCInst &Inst)
const{
4832 : AMDGPU::OpName::vdata;
4840if (Data2Areg >= 0 && Data2Areg != DataAreg)
4844auto FB = getFeatureBits();
4845if (FB[AMDGPU::FeatureGFX90AInsts]) {
4846if (DataAreg < 0 || DstAreg < 0)
4848return DstAreg == DataAreg;
4851return DstAreg < 1 && DataAreg < 1;
4854bool AMDGPUAsmParser::validateVGPRAlign(
constMCInst &Inst)
const{
4855auto FB = getFeatureBits();
4856if (!FB[AMDGPU::FeatureGFX90AInsts])
4871if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4873if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4881for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4882 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4884returnOp.getStartLoc();
4889bool AMDGPUAsmParser::validateBLGP(
constMCInst &Inst,
4899auto FB = getFeatureBits();
4901if (FB[AMDGPU::FeatureGFX940Insts]) {
4903case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4904case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4905case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4906case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4911if (IsNeg == UsesNeg)
4915 UsesNeg ?
"invalid modifier: blgp is not supported" 4916 :
"invalid modifier: neg is not supported");
4921bool AMDGPUAsmParser::validateWaitCnt(
constMCInst &Inst,
4927if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4928 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4929 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4930 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4936if (Reg == AMDGPU::SGPR_NULL)
4940Error(RegLoc,
"src0 must be null");
4944bool AMDGPUAsmParser::validateDS(
constMCInst &Inst,
4951// Only validate GDS for non-GWS instructions. 4961Error(S,
"gds modifier is not supported on this GPU");
4967// gfx90a has an undocumented limitation: 4968// DS_GWS opcodes must use even aligned registers. 4969bool AMDGPUAsmParser::validateGWS(
constMCInst &Inst,
4971if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4975if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4976 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4985auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4988Error(RegLoc,
"vgpr must be even aligned");
4995bool AMDGPUAsmParser::validateCoherencyBits(
constMCInst &Inst,
4999 AMDGPU::OpName::cpol);
5006return validateTHAndScopeBits(Inst,
Operands, CPol);
5012Error(S,
"cache policy is not supported for SMRD instructions");
5016Error(IDLoc,
"invalid cache policy for SMEM instruction");
5025if (!(TSFlags & AllowSCCModifier)) {
5030"scc modifier is not supported for this instruction on this GPU");
5041 :
"instruction must use glc");
5049 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5051 :
"instruction must not use glc");
5059bool AMDGPUAsmParser::validateTHAndScopeBits(
constMCInst &Inst,
5061constunsigned CPol) {
5077returnPrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5085returnPrintError(
"invalid th value for SMEM instruction");
5092returnPrintError(
"scope and th combination is not valid");
5101returnPrintError(
"invalid th value for atomic instructions");
5104returnPrintError(
"invalid th value for store instructions");
5107returnPrintError(
"invalid th value for load instructions");
5113bool AMDGPUAsmParser::validateTFE(
constMCInst &Inst,
5116if (
Desc.mayStore() &&
5120Error(Loc,
"TFE modifier has no meaning for store instructions");
5128bool AMDGPUAsmParser::validateInstruction(
constMCInst &Inst,
5131if (
auto ErrMsg = validateLdsDirect(Inst)) {
5135if (!validateSOPLiteral(Inst)) {
5137"only one unique literal operand is allowed");
5140if (!validateVOPLiteral(Inst,
Operands)) {
5143if (!validateConstantBusLimitations(Inst,
Operands)) {
5146if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5149if (!validateIntClampSupported(Inst)) {
5151"integer clamping is not supported on this GPU");
5154if (!validateOpSel(Inst)) {
5156"invalid op_sel operand");
5159if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5161"invalid neg_lo operand");
5164if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5166"invalid neg_hi operand");
5172// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 5173if (!validateMIMGD16(Inst)) {
5175"d16 modifier is not supported on this GPU");
5178if (!validateMIMGDim(Inst,
Operands)) {
5179Error(IDLoc,
"missing dim operand");
5182if (!validateMIMGMSAA(Inst)) {
5184"invalid dim; must be MSAA type");
5187if (!validateMIMGDataSize(Inst, IDLoc)) {
5190if (!validateMIMGAddrSize(Inst, IDLoc))
5192if (!validateMIMGAtomicDMask(Inst)) {
5194"invalid atomic image dmask");
5197if (!validateMIMGGatherDMask(Inst)) {
5199"invalid image_gather dmask: only one bit must be set");
5202if (!validateMovrels(Inst,
Operands)) {
5205if (!validateOffset(Inst,
Operands)) {
5208if (!validateMAIAccWrite(Inst,
Operands)) {
5211if (!validateMAISrc2(Inst,
Operands)) {
5217if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5221if (!validateAGPRLdSt(Inst)) {
5222Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5223 ?
"invalid register class: data and dst should be all VGPR or AGPR" 5224 :
"invalid register class: agpr loads and stores not supported on this GPU" 5228if (!validateVGPRAlign(Inst)) {
5230"invalid register class: vgpr tuples must be 64 bit aligned");
5241if (!validateDivScale(Inst)) {
5242Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5245if (!validateWaitCnt(Inst,
Operands)) {
5257unsigned VariantID = 0);
5263bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5268bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5271for (
auto Variant : Variants) {
5279bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5281FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5283// Check if requested instruction variant is supported. 5284if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5287// This instruction is not supported. 5288// Clear any other pending errors because they are no longer relevant. 5289 getParser().clearPendingErrors();
5291// Requested instruction variant is not supported. 5292// Check if any other variants are supported. 5293StringRef VariantName = getMatchedVariantName();
5294if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5297" variant of this instruction is not supported"));
5300// Check if this instruction may be used with a different wavesize. 5301if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5302 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5305 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5306 .
flip(AMDGPU::FeatureWavefrontSize32);
5308 ComputeAvailableFeatures(FeaturesWS32);
5310if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5311returnError(IDLoc,
"instruction requires wavesize=32");
5314// Finally check if this instruction is supported on any other GPU. 5316returnError(IDLoc,
"instruction not supported on this GPU");
5319// Instruction not supported on any GPU. Probably a typo. 5321returnError(IDLoc,
"invalid instruction" + Suggestion);
5327constauto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5328if (
Op.isToken() && InvalidOprIdx > 1) {
5329constauto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5330return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5335bool AMDGPUAsmParser::matchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5339bool MatchingInlineAsm) {
5341unsignedResult = Match_Success;
5342for (
auto Variant : getMatchedVariants()) {
5344autoR = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5346// We order match statuses from least to most specific. We use most specific 5347// status as resulting 5348// Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature 5349if (R == Match_Success || R == Match_MissingFeature ||
5350 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5351 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5352 Result != Match_MissingFeature)) {
5356if (R == Match_Success)
5360if (Result == Match_Success) {
5361if (!validateInstruction(Inst, IDLoc,
Operands)) {
5370if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5376case Match_MissingFeature:
5377// It has been verified that the specified instruction 5378// mnemonic is valid. A match was found but it requires 5379// features which are not supported on this GPU. 5380returnError(IDLoc,
"operands are not valid for this GPU or mode");
5382case Match_InvalidOperand: {
5383SMLoc ErrorLoc = IDLoc;
5386returnError(IDLoc,
"too few operands for instruction");
5389if (ErrorLoc ==
SMLoc())
5393returnError(ErrorLoc,
"invalid VOPDY instruction");
5395returnError(ErrorLoc,
"invalid operand for instruction");
5398case Match_MnemonicFail:
5404bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5409if (getParser().parseAbsoluteExpression(Tmp)) {
5416bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5418return TokError(
"directive only supported for amdgcn architecture");
5420 std::string TargetIDDirective;
5421SMLoc TargetStart = getTok().getLoc();
5422if (getParser().parseEscapedString(TargetIDDirective))
5426if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5427return getParser().Error(TargetRange.
Start,
5428 (
Twine(
".amdgcn_target directive's target id ") +
5429Twine(TargetIDDirective) +
5430Twine(
" does not match the specified target id ") +
5431Twine(getTargetStreamer().getTargetID()->
toString())).str());
5440bool AMDGPUAsmParser::calculateGPRBlocks(
5442constMCExpr *FlatScrUsed,
bool XNACKUsed,
5443 std::optional<bool> EnableWavefrontSize32,
constMCExpr *NextFreeVGPR,
5446// TODO(scott.linder): These calculations are duplicated from 5447// AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5452 int64_t EvaluatedSGPRs;
5457unsigned MaxAddressableNumSGPRs =
5460if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5461 !Features.
test(FeatureSGPRInitBug) &&
5462static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5463return OutOfRangeError(SGPRRange);
5469if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5470 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5471static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5472return OutOfRangeError(SGPRRange);
5474if (Features.
test(FeatureSGPRInitBug))
5479// The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks: 5480// (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1 5481auto GetNumGPRBlocks = [&Ctx](
constMCExpr *NumGPR,
5482unsigned Granule) ->
constMCExpr * {
5494 VGPRBlocks = GetNumGPRBlocks(
5503bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5505return TokError(
"directive only supported for amdgcn architecture");
5508return TokError(
"directive only supported for amdhsa OS");
5511if (getParser().parseIdentifier(KernelName))
5516 &getSTI(), getContext());
5526constMCExpr *NextFreeVGPR = ZeroExpr;
5532constMCExpr *NextFreeSGPR = ZeroExpr;
5534// Count the number of user SGPRs implied from the enabled feature bits. 5535unsigned ImpliedUserSGPRCount = 0;
5537// Track if the asm explicitly contains the directive for the user SGPR 5539 std::optional<unsigned> ExplicitUserSGPRCount;
5540constMCExpr *ReserveVCC = OneExpr;
5541constMCExpr *ReserveFlatScr = OneExpr;
5542 std::optional<bool> EnableWavefrontSize32;
5548SMRange IDRange = getTok().getLocRange();
5549if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5552if (
ID ==
".end_amdhsa_kernel")
5556return TokError(
".amdhsa_ directives cannot be repeated");
5558SMLoc ValStart = getLoc();
5560if (getParser().parseExpression(ExprVal))
5562SMLoc ValEnd = getLoc();
5567bool EvaluatableExpr;
5568if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5570return OutOfRangeError(ValRange);
5574#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5575 if (!isUInt<ENTRY##_WIDTH>(Val)) \ 5576 return OutOfRangeError(RANGE); \ 5577 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ 5580// Some fields use the parsed value immediately which requires the expression to 5582#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ 5584 return Error(IDRange.Start, "directive should have resolvable expression", \
5587if (
ID ==
".amdhsa_group_segment_fixed_size") {
5590return OutOfRangeError(ValRange);
5592 }
elseif (
ID ==
".amdhsa_private_segment_fixed_size") {
5595return OutOfRangeError(ValRange);
5597 }
elseif (
ID ==
".amdhsa_kernarg_size") {
5599return OutOfRangeError(ValRange);
5601 }
elseif (
ID ==
".amdhsa_user_sgpr_count") {
5603 ExplicitUserSGPRCount = Val;
5604 }
elseif (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5608"directive is not supported with architected flat scratch",
5611 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5614 ImpliedUserSGPRCount += 4;
5615 }
elseif (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5618returnError(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5621return OutOfRangeError(ValRange);
5625 ImpliedUserSGPRCount += Val;
5626 PreloadLength = Val;
5628 }
elseif (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5631returnError(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5634return OutOfRangeError(ValRange);
5638 PreloadOffset = Val;
5639 }
elseif (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5645 ImpliedUserSGPRCount += 2;
5646 }
elseif (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5649 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5652 ImpliedUserSGPRCount += 2;
5653 }
elseif (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5656 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5659 ImpliedUserSGPRCount += 2;
5660 }
elseif (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5663 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5666 ImpliedUserSGPRCount += 2;
5667 }
elseif (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5670"directive is not supported with architected flat scratch",
5674 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5677 ImpliedUserSGPRCount += 2;
5678 }
elseif (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5684 ImpliedUserSGPRCount += 1;
5685 }
elseif (
ID ==
".amdhsa_wavefront_size32") {
5687if (IVersion.
Major < 10)
5688returnError(IDRange.
Start,
"directive requires gfx10+", IDRange);
5689 EnableWavefrontSize32 = Val;
5691 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5693 }
elseif (
ID ==
".amdhsa_uses_dynamic_stack") {
5695 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5697 }
elseif (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5700"directive is not supported with architected flat scratch",
5703 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5705 }
elseif (
ID ==
".amdhsa_enable_private_segment") {
5709"directive is not supported without architected flat scratch",
5712 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5714 }
elseif (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5716 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5718 }
elseif (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5720 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5722 }
elseif (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5724 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5726 }
elseif (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5728 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5730 }
elseif (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5732 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5734 }
elseif (
ID ==
".amdhsa_next_free_vgpr") {
5735 VGPRRange = ValRange;
5736 NextFreeVGPR = ExprVal;
5737 }
elseif (
ID ==
".amdhsa_next_free_sgpr") {
5738 SGPRRange = ValRange;
5739 NextFreeSGPR = ExprVal;
5740 }
elseif (
ID ==
".amdhsa_accum_offset") {
5742returnError(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5743 AccumOffset = ExprVal;
5744 }
elseif (
ID ==
".amdhsa_reserve_vcc") {
5745if (EvaluatableExpr && !isUInt<1>(Val))
5746return OutOfRangeError(ValRange);
5747 ReserveVCC = ExprVal;
5748 }
elseif (
ID ==
".amdhsa_reserve_flat_scratch") {
5749if (IVersion.
Major < 7)
5750returnError(IDRange.
Start,
"directive requires gfx7+", IDRange);
5753"directive is not supported with architected flat scratch",
5755if (EvaluatableExpr && !isUInt<1>(Val))
5756return OutOfRangeError(ValRange);
5757 ReserveFlatScr = ExprVal;
5758 }
elseif (
ID ==
".amdhsa_reserve_xnack_mask") {
5759if (IVersion.
Major < 8)
5760returnError(IDRange.
Start,
"directive requires gfx8+", IDRange);
5762return OutOfRangeError(ValRange);
5763if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5764return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5766 }
elseif (
ID ==
".amdhsa_float_round_mode_32") {
5768 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5770 }
elseif (
ID ==
".amdhsa_float_round_mode_16_64") {
5772 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5774 }
elseif (
ID ==
".amdhsa_float_denorm_mode_32") {
5776 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5778 }
elseif (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5780 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5782 }
elseif (
ID ==
".amdhsa_dx10_clamp") {
5783if (IVersion.
Major >= 12)
5784returnError(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5786 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5788 }
elseif (
ID ==
".amdhsa_ieee_mode") {
5789if (IVersion.
Major >= 12)
5790returnError(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5792 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5794 }
elseif (
ID ==
".amdhsa_fp16_overflow") {
5795if (IVersion.
Major < 9)
5796returnError(IDRange.
Start,
"directive requires gfx9+", IDRange);
5798 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5800 }
elseif (
ID ==
".amdhsa_tg_split") {
5802returnError(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5805 }
elseif (
ID ==
".amdhsa_workgroup_processor_mode") {
5806if (IVersion.
Major < 10)
5807returnError(IDRange.
Start,
"directive requires gfx10+", IDRange);
5809 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5811 }
elseif (
ID ==
".amdhsa_memory_ordered") {
5812if (IVersion.
Major < 10)
5813returnError(IDRange.
Start,
"directive requires gfx10+", IDRange);
5815 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5817 }
elseif (
ID ==
".amdhsa_forward_progress") {
5818if (IVersion.
Major < 10)
5819returnError(IDRange.
Start,
"directive requires gfx10+", IDRange);
5821 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5823 }
elseif (
ID ==
".amdhsa_shared_vgpr_count") {
5825if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5826returnError(IDRange.
Start,
"directive requires gfx10 or gfx11",
5828 SharedVGPRCount = Val;
5830 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5832 }
elseif (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5835 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5837 }
elseif (
ID ==
".amdhsa_exception_fp_denorm_src") {
5839 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5841 }
elseif (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5844 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5846 }
elseif (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5848 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5850 }
elseif (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5852 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5854 }
elseif (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5858 }
elseif (
ID ==
".amdhsa_exception_int_div_zero") {
5860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5862 }
elseif (
ID ==
".amdhsa_round_robin_scheduling") {
5863if (IVersion.
Major < 12)
5864returnError(IDRange.
Start,
"directive requires gfx12+", IDRange);
5866 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5869returnError(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5872#undef PARSE_BITS_ENTRY 5875if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5876return TokError(
".amdhsa_next_free_vgpr directive is required");
5878if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5879return TokError(
".amdhsa_next_free_sgpr directive is required");
5881unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
5883// Consider the case where the total number of UserSGPRs with trailing 5884// allocated preload SGPRs, is greater than the number of explicitly 5894if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5895 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5896 EnableWavefrontSize32, NextFreeVGPR,
5897 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5901 int64_t EvaluatedVGPRBlocks;
5902bool VGPRBlocksEvaluatable =
5903 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5904if (VGPRBlocksEvaluatable &&
5905 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5906static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5907return OutOfRangeError(VGPRRange);
5911 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5912 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5914 int64_t EvaluatedSGPRBlocks;
5915if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5916 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5917static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5918return OutOfRangeError(SGPRRange);
5921 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5922 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5924if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5925return TokError(
"amdgpu_user_sgpr_count smaller than than implied by " 5926"enabled user SGPRs");
5928if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5929return TokError(
"too many user SGPRs enabled");
5932 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5933 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5937return TokError(
"Kernarg size should be resolvable");
5939if (PreloadLength && kernarg_size &&
5940 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5941return TokError(
"Kernarg preload length + offset is larger than the " 5942"kernarg segment size");
5945if (!Seen.
contains(
".amdhsa_accum_offset"))
5946return TokError(
".amdhsa_accum_offset directive is required");
5947 int64_t EvaluatedAccum;
5948bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5949uint64_t UEvaluatedAccum = EvaluatedAccum;
5950if (AccumEvaluatable &&
5951 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5952return TokError(
"accum_offset should be in range [4..256] in " 5955 int64_t EvaluatedNumVGPR;
5956if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5960return TokError(
"accum_offset exceeds total VGPR allocation");
5966 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5967 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5971if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5972// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5973if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5974return TokError(
"shared_vgpr_count directive not valid on " 5975"wavefront size 32");
5978if (VGPRBlocksEvaluatable &&
5979 (SharedVGPRCount * 2 +
static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5981return TokError(
"shared_vgpr_count*2 + " 5982"compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5987 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5988 NextFreeVGPR, NextFreeSGPR,
5989 ReserveVCC, ReserveFlatScr);
5993bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5995if (ParseAsAbsoluteExpression(Version))
5998 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6002bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRefID,
6004// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 6005// assembly for backwards compatibility. 6006if (
ID ==
"max_scratch_backing_memory_byte_size") {
6007 Parser.eatToEndOfStatement();
6013if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6014return TokError(Err.str());
6018if (
ID ==
"enable_wavefront_size32") {
6021return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6022if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6023return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6025if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6026return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6030if (
ID ==
"wavefront_size") {
6031if (
C.wavefront_size == 5) {
6033return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6034if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6035return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6036 }
elseif (
C.wavefront_size == 6) {
6037if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6038return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6045bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6050// Lex EndOfStatement. This is in a while loop, because lexing a comment 6051// will set the current token to EndOfStatement. 6055if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6058if (
ID ==
".end_amd_kernel_code_t")
6061if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6065 KernelCode.
validate(&getSTI(), getContext());
6066 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6071bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6073if (!parseId(KernelName,
"expected symbol name"))
6076 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6079 KernelScope.initialize(getContext());
6083bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6085returnError(getLoc(),
6086".amd_amdgpu_isa directive is not available on non-amdgcn " 6090auto TargetIDDirective = getLexer().getTok().getStringContents();
6091if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6092returnError(getParser().getTok().getLoc(),
"target id must match options");
6094 getTargetStreamer().EmitISAVersion();
6100bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6103 std::string HSAMetadataString;
6108if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6109returnError(getLoc(),
"invalid HSA metadata");
6114/// Common code to parse out a block of text (typically YAML) between start and 6116bool AMDGPUAsmParser::ParseToEndDirective(
constchar *AssemblerDirectiveBegin,
6117constchar *AssemblerDirectiveEnd,
6118 std::string &CollectString) {
6122 getLexer().setSkipSpace(
false);
6124bool FoundEnd =
false;
6127 CollectStream << getTokenStr();
6131if (trySkipId(AssemblerDirectiveEnd)) {
6136 CollectStream << Parser.parseStringToEndOfStatement()
6137 << getContext().getAsmInfo()->getSeparatorString();
6139 Parser.eatToEndOfStatement();
6142 getLexer().setSkipSpace(
true);
6145return TokError(
Twine(
"expected directive ") +
6146Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6152/// Parse the assembler directive for new MsgPack-format PAL metadata. 6153bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6159auto *PALMetadata = getTargetStreamer().getPALMetadata();
6160if (!PALMetadata->setFromString(
String))
6161returnError(getLoc(),
"invalid PAL metadata");
6165/// Parse the assembler directive for old linear-format PAL metadata. 6166bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6168returnError(getLoc(),
6170"not available on non-amdpal OSes")).str());
6173auto *PALMetadata = getTargetStreamer().getPALMetadata();
6174 PALMetadata->setLegacy();
6177if (ParseAsAbsoluteExpression(Key)) {
6178return TokError(
Twine(
"invalid value in ") +
6182return TokError(
Twine(
"expected an even number of values in ") +
6185if (ParseAsAbsoluteExpression(
Value)) {
6186return TokError(
Twine(
"invalid value in ") +
6189 PALMetadata->setRegister(Key,
Value);
6196/// ParseDirectiveAMDGPULDS 6197/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 6198bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6199if (getParser().checkForValidSection())
6203SMLoc NameLoc = getLoc();
6204if (getParser().parseIdentifier(
Name))
6205return TokError(
"expected identifier in directive");
6208if (getParser().parseComma())
6214SMLoc SizeLoc = getLoc();
6215if (getParser().parseAbsoluteExpression(
Size))
6218returnError(SizeLoc,
"size must be non-negative");
6219if (
Size > LocalMemorySize)
6220returnError(SizeLoc,
"size is too large");
6222 int64_t Alignment = 4;
6224SMLoc AlignLoc = getLoc();
6225if (getParser().parseAbsoluteExpression(Alignment))
6228returnError(AlignLoc,
"alignment must be a power of two");
6230// Alignment larger than the size of LDS is possible in theory, as long 6231// as the linker manages to place to symbol at address 0, but we do want 6232// to make sure the alignment fits nicely into a 32-bit integer. 6233if (Alignment >= 1u << 31)
6234returnError(AlignLoc,
"alignment is too large");
6240Symbol->redefineIfPossible();
6241if (!
Symbol->isUndefined())
6242returnError(NameLoc,
"invalid symbol redefinition");
6244 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6248bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6252if (IDVal ==
".amdhsa_kernel")
6253return ParseDirectiveAMDHSAKernel();
6255if (IDVal ==
".amdhsa_code_object_version")
6256return ParseDirectiveAMDHSACodeObjectVersion();
6258// TODO: Restructure/combine with PAL metadata directive. 6260return ParseDirectiveHSAMetadata();
6262if (IDVal ==
".amd_kernel_code_t")
6263return ParseDirectiveAMDKernelCodeT();
6265if (IDVal ==
".amdgpu_hsa_kernel")
6266return ParseDirectiveAMDGPUHsaKernel();
6268if (IDVal ==
".amd_amdgpu_isa")
6269return ParseDirectiveISAVersion();
6273Twine(
" directive is " 6274"not available on non-amdhsa OSes"))
6279if (IDVal ==
".amdgcn_target")
6280return ParseDirectiveAMDGCNTarget();
6282if (IDVal ==
".amdgpu_lds")
6283return ParseDirectiveAMDGPULDS();
6286return ParseDirectivePALMetadataBegin();
6289return ParseDirectivePALMetadata();
6296if (
MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6299// GFX10+ has 2 more SGPRs 104 and 105. 6300if (
MRI.regsOverlap(SGPR104_SGPR105, Reg))
6301return hasSGPR104_SGPR105();
6304case SRC_SHARED_BASE_LO:
6305case SRC_SHARED_BASE:
6306case SRC_SHARED_LIMIT_LO:
6307case SRC_SHARED_LIMIT:
6308case SRC_PRIVATE_BASE_LO:
6309case SRC_PRIVATE_BASE:
6310case SRC_PRIVATE_LIMIT_LO:
6311case SRC_PRIVATE_LIMIT:
6313case SRC_POPS_EXITING_WAVE_ID:
6325return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6339// No flat_scr on SI. 6340// On GFX10Plus flat scratch is not a valid register operand and can only be 6341// accessed with s_setreg/s_getreg. 6352// VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 6354if (
MRI.regsOverlap(SGPR102_SGPR103, Reg))
6355return hasSGPR102_SGPR103();
6367// Try to parse with a custom parser 6368 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6370// If we successfully parsed the operand or if there as an error parsing, 6373// If we are parsing after we reach EndOfStatement then this means we 6374// are appending default values to the Operands list. This is only done 6375// by custom parser, so we shouldn't continue on to the generic parsing. 6380SMLoc LBraceLoc = getLoc();
6388Error(Loc,
"expected a register");
6392 RBraceLoc = getLoc();
6397"expected a comma or a closing square bracket"))
6403 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6404Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6414// Clear any forced encodings from the previous instruction. 6415 setForcedEncodingSize(0);
6416 setForcedDPP(
false);
6417 setForcedSDWA(
false);
6419if (
Name.ends_with(
"_e64_dpp")) {
6421 setForcedEncodingSize(64);
6422returnName.substr(0,
Name.size() - 8);
6424if (
Name.ends_with(
"_e64")) {
6425 setForcedEncodingSize(64);
6426returnName.substr(0,
Name.size() - 4);
6428if (
Name.ends_with(
"_e32")) {
6429 setForcedEncodingSize(32);
6430returnName.substr(0,
Name.size() - 4);
6432if (
Name.ends_with(
"_dpp")) {
6434returnName.substr(0,
Name.size() - 4);
6436if (
Name.ends_with(
"_sdwa")) {
6437 setForcedSDWA(
true);
6438returnName.substr(0,
Name.size() - 5);
6450// Add the instruction mnemonic 6453// If the target architecture uses MnemonicAlias, call it here to parse 6454// operands correctly. 6457Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6459bool IsMIMG =
Name.starts_with(
"image_");
6462 OperandMode Mode = OperandMode_Default;
6464 Mode = OperandMode_NSA;
6468 checkUnsupportedInstruction(
Name, NameLoc);
6469if (!Parser.hasPendingError()) {
6470// FIXME: use real operand location rather than the current location. 6472 :
"not a valid operand.";
6473Error(getLoc(), Msg);
6481// Eat the comma or space if there is one. 6488//===----------------------------------------------------------------------===// 6490//===----------------------------------------------------------------------===// 6495if (!trySkipId(
Name))
6498Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6502ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
constchar *Prefix,
6513 std::function<
bool(int64_t &)> ConvertResult) {
6521if (ConvertResult && !ConvertResult(
Value)) {
6525Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6529ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6531bool (*ConvertResult)(int64_t &)) {
6540constunsigned MaxSize = 4;
6542// FIXME: How to verify the number of elements matches the number of src 6544for (
intI = 0; ; ++
I) {
6546SMLoc Loc = getLoc();
6550if (
Op != 0 &&
Op != 1)
6558if (
I + 1 == MaxSize)
6559returnError(getLoc(),
"expected a closing square bracket");
6565Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6571 AMDGPUOperand::ImmTy ImmTy) {
6575if (trySkipId(
Name)) {
6577 }
elseif (trySkipId(
"no",
Name)) {
6584returnError(S,
"r128 modifier is not supported on this GPU");
6586returnError(S,
"a16 modifier is not supported on this GPU");
6588if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6589 ImmTy = AMDGPUOperand::ImmTyR128A16;
6591Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6596bool &Disabling)
const{
6597 Disabling =
Id.consume_front(
"no");
6617SMLoc StringLoc = getLoc();
6619 int64_t CPolVal = 0;
6637 ResScope = parseScope(
Operands, Scope);
6652Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6653 AMDGPUOperand::ImmTyCPol));
6658SMLoc OpLoc = getLoc();
6663unsignedCPol = getCPolKind(getId(), Mnemo, Disabling);
6670returnError(S,
"dlc modifier is not supported on this GPU");
6673returnError(S,
"scc modifier is not supported on this GPU");
6676returnError(S,
"duplicate cache policy modifier");
6688 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6698Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
6716if (
Value ==
"TH_DEFAULT")
6718elseif (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6719Value ==
"TH_LOAD_NT_WB") {
6720returnError(StringLoc,
"invalid th value");
6721 }
elseif (
Value.consume_front(
"TH_ATOMIC_")) {
6723 }
elseif (
Value.consume_front(
"TH_LOAD_")) {
6725 }
elseif (
Value.consume_front(
"TH_STORE_")) {
6728returnError(StringLoc,
"invalid th value");
6731if (
Value ==
"BYPASS")
6762if (TH == 0xffffffff)
6763returnError(StringLoc,
"invalid th value");
6770 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6771 AMDGPUOperand::ImmTy ImmT,
6773auto i = OptionalIdx.find(ImmT);
6774if (i != OptionalIdx.end()) {
6775unsignedIdx = i->second;
6776 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6788 StringLoc = getLoc();
6793ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6799SMLoc StringLoc = getLoc();
6803Value = getTokenStr();
6807if (
Value == Ids[IntVal])
6812if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
6818ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6820 AMDGPUOperand::ImmTy
Type) {
6826Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
6831//===----------------------------------------------------------------------===// 6833//===----------------------------------------------------------------------===// 6835bool AMDGPUAsmParser::tryParseFmt(
constchar *Pref,
6839SMLoc Loc = getLoc();
6841auto Res = parseIntWithPrefix(Pref, Val);
6847if (Val < 0 || Val > MaxVal) {
6857 AMDGPUOperand::ImmTy ImmTy) {
6858constchar *Pref =
"index_key";
6860SMLoc Loc = getLoc();
6861auto Res = parseIntWithPrefix(Pref, ImmVal);
6865if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6868if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6871Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6876return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6880return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6883// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6884// values to live in a joint format operand in the MCInst encoding. 6885ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6891// dfmt and nfmt can appear in either order, and each is optional. 6892for (
intI = 0;
I < 2; ++
I) {
6893if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6896if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6899// Skip optional comma between dfmt/nfmt 6900// but guard against 2 commas following each other. 6901if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6907if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6910 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6911 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6917ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6922if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6925if (Fmt == UFMT_UNDEF)
6932bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6940if (Format != DFMT_UNDEF) {
6946if (Format != NFMT_UNDEF) {
6951Error(Loc,
"unsupported format");
6962if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6967SMLoc Loc = getLoc();
6968if (!parseId(Str,
"expected a format string") ||
6969 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6971if (Dfmt == DFMT_UNDEF)
6972returnError(Loc,
"duplicate numeric format");
6973if (Nfmt == NFMT_UNDEF)
6974returnError(Loc,
"duplicate data format");
6977 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6978 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6982if (Ufmt == UFMT_UNDEF)
6983returnError(FormatLoc,
"unsupported format");
6998if (Id == UFMT_UNDEF)
7002returnError(Loc,
"unified format is not supported on this GPU");
7008ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7010SMLoc Loc = getLoc();
7015returnError(Loc,
"out of range format");
7020ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7028SMLoc Loc = getLoc();
7029if (!parseId(FormatStr,
"expected a format string"))
7032auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7034 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7044return parseNumericFormat(Format);
7052SMLoc Loc = getLoc();
7054// Parse legacy format syntax. 7062 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7068// We are expecting an soffset operand, 7069// but let matcher handle the error. 7081 Res = parseSymbolicOrNumericFormat(Format);
7086 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7087assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7094returnError(getLoc(),
"duplicate format");
7100 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7102 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7103 AMDGPUOperand::ImmTyInstOffset);
7110 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7112 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7118 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7121 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7126//===----------------------------------------------------------------------===// 7128//===----------------------------------------------------------------------===// 7131 OptionalImmIndexMap OptionalIdx;
7133unsigned OperandIdx[4];
7137for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7138 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7140// Add the register arguments 7143 OperandIdx[SrcIdx] = Inst.
size();
7144Op.addRegOperands(Inst, 1);
7151 OperandIdx[SrcIdx] = Inst.
size();
7157if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7158Op.addImmOperands(Inst, 1);
7162if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7165// Handle optional arguments 7166 OptionalIdx[
Op.getImmTy()] = i;
7172if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7179for (
auto i = 0; i < SrcIdx; ++i) {
7181 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7191//===----------------------------------------------------------------------===// 7193//===----------------------------------------------------------------------===// 7206 IntVal =
encode(ISA, IntVal, CntVal);
7207if (CntVal !=
decode(ISA, IntVal)) {
7209 IntVal =
encode(ISA, IntVal, -1);
7217bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7219SMLoc CntLoc = getLoc();
7227SMLoc ValLoc = getLoc();
7236if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7238 }
elseif (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7240 }
elseif (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7243Error(CntLoc,
"invalid counter name " + CntName);
7248Error(ValLoc,
"too large value for " + CntName);
7257Error(getLoc(),
"expected a counter name");
7284bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7285SMLoc FieldLoc = getLoc();
7291SMLoc ValueLoc = getLoc();
7298if (FieldName ==
"instid0") {
7300 }
elseif (FieldName ==
"instskip") {
7302 }
elseif (FieldName ==
"instid1") {
7305Error(FieldLoc,
"invalid field name " + FieldName);
7311// Parse values for instskip. 7321// Parse values for instid0 and instid1. 7324 .
Case(
"VALU_DEP_1", 1)
7325 .
Case(
"VALU_DEP_2", 2)
7326 .
Case(
"VALU_DEP_3", 3)
7327 .
Case(
"VALU_DEP_4", 4)
7328 .
Case(
"TRANS32_DEP_1", 5)
7329 .
Case(
"TRANS32_DEP_2", 6)
7330 .
Case(
"TRANS32_DEP_3", 7)
7331 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7332 .
Case(
"SALU_CYCLE_1", 9)
7333 .
Case(
"SALU_CYCLE_2", 10)
7334 .
Case(
"SALU_CYCLE_3", 11)
7342 Delay |=
Value << Shift;
7352if (!parseDelay(Delay))
7360Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7365AMDGPUOperand::isSWaitCnt()
const{
7369bool AMDGPUOperand::isSDelayALU()
const{
returnisImm(); }
7371//===----------------------------------------------------------------------===// 7373//===----------------------------------------------------------------------===// 7375void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7379Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7382Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7385Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7388Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7395bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7399SMLoc DepCtrLoc = getLoc();
7410unsigned PrevOprMask = UsedOprMask;
7411int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7414 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7423Error(getLoc(),
"expected a counter name");
7428unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7429 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7437SMLoc Loc = getLoc();
7440unsigned UsedOprMask = 0;
7442if (!parseDepCtr(DepCtr, UsedOprMask))
7450Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7454bool AMDGPUOperand::isDepCtr()
const{
return isS16Imm(); }
7456//===----------------------------------------------------------------------===// 7458//===----------------------------------------------------------------------===// 7460ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7462 OperandInfoTy &Width) {
7468// The register may be specified by name or using a numeric code 7469 HwReg.Loc = getLoc();
7472 HwReg.IsSymbolic =
true;
7473 lex();
// skip register name 7474 }
elseif (!
parseExpr(HwReg.Val,
"a register name")) {
7481// parse optional params 7482if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7492 Width.Loc = getLoc();
7504SMLoc Loc = getLoc();
7506 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7508 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7509 HwregOffset::Default);
7510 struct : StructuredOpField {
7511usingStructuredOpField::StructuredOpField;
7512boolvalidate(AMDGPUAsmParser &Parser)
const override{
7514returnError(Parser,
"only values from 1 to 32 are legal");
7517 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7521 Res = parseHwregFunc(HwReg,
Offset, Width);
7524if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7526 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7530parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7536if (!isUInt<16>(ImmVal))
7537returnError(Loc,
"invalid immediate: only 16-bit values are legal");
7539 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7543bool AMDGPUOperand::isHwreg()
const{
7544return isImmTy(ImmTyHwreg);
7547//===----------------------------------------------------------------------===// 7549//===----------------------------------------------------------------------===// 7552AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7554 OperandInfoTy &Stream) {
7560 Msg.IsSymbolic =
true;
7561 lex();
// skip message name 7562 }
elseif (!
parseExpr(Msg.Val,
"a message name")) {
7570 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7572 lex();
// skip operation name 7573 }
elseif (!
parseExpr(
Op.Val,
"an operation name")) {
7578 Stream.IsDefined =
true;
7579 Stream.Loc = getLoc();
7589AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7590const OperandInfoTy &
Op,
7591const OperandInfoTy &Stream) {
7594// Validation strictness depends on whether message is specified 7595// in a symbolic or in a numeric form. In the latter case 7596// only encoding possibility is checked. 7597boolStrict = Msg.IsSymbolic;
7601Error(Msg.Loc,
"specified message id is not supported on this GPU");
7606Error(Msg.Loc,
"invalid message id");
7612Error(
Op.Loc,
"message does not support operations");
7614Error(Msg.Loc,
"missing message operation");
7620Error(
Op.Loc,
"specified operation id is not supported on this GPU");
7622Error(
Op.Loc,
"invalid operation id");
7627Error(Stream.Loc,
"message operation does not support streams");
7631Error(Stream.Loc,
"invalid message stream id");
7641SMLoc Loc = getLoc();
7645 OperandInfoTy
Op(OP_NONE_);
7646 OperandInfoTy Stream(STREAM_ID_NONE_);
7647if (parseSendMsgBody(Msg,
Op, Stream) &&
7648 validateSendMsg(Msg,
Op, Stream)) {
7653 }
elseif (
parseExpr(ImmVal,
"a sendmsg macro")) {
7654if (ImmVal < 0 || !isUInt<16>(ImmVal))
7655returnError(Loc,
"invalid immediate: only 16-bit values are legal");
7660Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7664bool AMDGPUOperand::isSendMsg()
const{
7665return isImmTy(ImmTySendMsg);
7668//===----------------------------------------------------------------------===// 7670//===----------------------------------------------------------------------===// 7686returnError(S,
"invalid interpolation slot");
7688Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7689 AMDGPUOperand::ImmTyInterpSlot));
7700if (!Str.starts_with(
"attr"))
7701returnError(S,
"invalid interpolation attribute");
7711returnError(S,
"invalid or missing interpolation attribute channel");
7713 Str = Str.drop_back(2).drop_front(4);
7716if (Str.getAsInteger(10, Attr))
7717returnError(S,
"invalid or missing interpolation attribute number");
7720returnError(S,
"out of bounds interpolation attribute number");
7724Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7725 AMDGPUOperand::ImmTyInterpAttr));
7726Operands.push_back(AMDGPUOperand::CreateImm(
7727this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7731//===----------------------------------------------------------------------===// 7733//===----------------------------------------------------------------------===// 7746returnError(S, (
Id == ET_INVALID)
7747 ?
"invalid exp target" 7748 :
"exp target is not supported on this GPU");
7750Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7751 AMDGPUOperand::ImmTyExpTgt));
7755//===----------------------------------------------------------------------===// 7757//===----------------------------------------------------------------------===// 7766return isId(getToken(),
Id);
7771return getTokenKind() ==
Kind;
7774StringRef AMDGPUAsmParser::getId()
const{
7801if (isId(
Id) && peekToken().is(Kind)) {
7821if (!trySkipToken(Kind)) {
7822Error(getLoc(), ErrMsg);
7833if (Parser.parseExpression(Expr))
7836if (Expr->evaluateAsAbsolute(
Imm))
7840Error(S,
"expected absolute expression");
7843Twine(
" or an absolute expression"));
7853if (Parser.parseExpression(Expr))
7857if (Expr->evaluateAsAbsolute(IntVal)) {
7858Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7860Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7868 Val = getToken().getStringContents();
7872Error(getLoc(), ErrMsg);
7879 Val = getTokenStr();
7884Error(getLoc(), ErrMsg);
7889AMDGPUAsmParser::getToken()
const{
7890return Parser.getTok();
7893AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7896 : getLexer().peekTok(ShouldSkipSpace);
7901auto TokCount = getLexer().peekTokens(Tokens);
7908AMDGPUAsmParser::getTokenKind()
const{
7913AMDGPUAsmParser::getLoc()
const{
7914return getToken().getLoc();
7918AMDGPUAsmParser::getTokenStr()
const{
7919return getToken().getString();
7923AMDGPUAsmParser::lex() {
7928return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7932AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7934for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7935 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7937returnOp.getStartLoc();
7943AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7945autoTest = [=](
const AMDGPUOperand&
Op) {
returnOp.isImmTy(
Type); };
7951autoTest = [=](
const AMDGPUOperand&
Op) {
7952returnOp.isRegKind() &&
Op.getReg() ==
Reg;
7958bool SearchMandatoryLiterals)
const{
7959autoTest = [](
const AMDGPUOperand&
Op) {
7960returnOp.IsImmKindLiteral() ||
Op.isExpr();
7963if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7964 Loc = getMandatoryLitLoc(
Operands);
7969autoTest = [](
const AMDGPUOperand &
Op) {
7970returnOp.IsImmKindMandatoryLiteral();
7977autoTest = [](
const AMDGPUOperand&
Op) {
7978returnOp.isImmKindConst();
7995SMLoc IdLoc = getLoc();
8001find_if(Fields, [
Id](StructuredOpField *
F) {
returnF->Id ==
Id; });
8002if (
I == Fields.
end())
8003returnError(IdLoc,
"unknown field");
8005returnError(IdLoc,
"duplicate field");
8007// TODO: Support symbolic values. 8008 (*I)->Loc = getLoc();
8011 (*I)->IsDefined =
true;
8018bool AMDGPUAsmParser::validateStructuredOpFields(
8020returnall_of(Fields, [
this](
const StructuredOpField *
F) {
8021returnF->validate(*
this);
8025//===----------------------------------------------------------------------===// 8027//===----------------------------------------------------------------------===// 8032constunsigned OrMask,
8033constunsigned XorMask) {
8036return BITMASK_PERM_ENC |
8037 (AndMask << BITMASK_AND_SHIFT) |
8038 (OrMask << BITMASK_OR_SHIFT) |
8039 (XorMask << BITMASK_XOR_SHIFT);
8042bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
constunsigned MinVal,
8043constunsigned MaxVal,
8052if (Op < MinVal || Op > MaxVal) {
8061AMDGPUAsmParser::parseSwizzleOperands(
constunsigned OpNum, int64_t*
Op,
8062constunsigned MinVal,
8063constunsigned MaxVal,
8066for (
unsigned i = 0; i < OpNum; ++i) {
8067if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8075AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8079if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8080"expected a 2-bit lane id")) {
8091AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8098if (!parseSwizzleOperand(GroupSize,
8100"group size must be in the interval [2,32]",
8105Error(Loc,
"group size must be a power of two");
8108if (parseSwizzleOperand(LaneIdx,
8110"lane id must be in the interval [0,group size - 1]",
8119AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8125if (!parseSwizzleOperand(GroupSize,
8127"group size must be in the interval [2,32]",
8132Error(Loc,
"group size must be a power of two");
8141AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8147if (!parseSwizzleOperand(GroupSize,
8149"group size must be in the interval [1,16]",
8154Error(Loc,
"group size must be a power of two");
8163AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8171SMLoc StrLoc = getLoc();
8172if (!parseString(Ctl)) {
8175if (Ctl.
size() != BITMASK_WIDTH) {
8176Error(StrLoc,
"expected a 5-character mask");
8180unsigned AndMask = 0;
8182unsigned XorMask = 0;
8184for (
size_t i = 0; i < Ctl.
size(); ++i) {
8188Error(StrLoc,
"invalid mask");
8209bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
8213Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8219if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8220"FFT swizzle must be in the interval [0," +
8229bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
8233Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
8240if (!parseSwizzleOperand(
Direction, 0, 1,
8241"direction must be 0 (left) or 1 (right)", Loc))
8245if (!parseSwizzleOperand(
8246 RotateSize, 0, ROTATE_MAX_SIZE,
8247"number of threads to rotate must be in the interval [0," +
8253 (RotateSize << ROTATE_SIZE_SHIFT);
8258AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
8260SMLoc OffsetLoc = getLoc();
8265if (!isUInt<16>(
Imm)) {
8266Error(OffsetLoc,
"expected a 16-bit offset");
8273AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
8278SMLoc ModeLoc = getLoc();
8281if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8282 Ok = parseSwizzleQuadPerm(
Imm);
8283 }
elseif (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8284 Ok = parseSwizzleBitmaskPerm(
Imm);
8285 }
elseif (trySkipId(IdSymbolic[ID_BROADCAST])) {
8286 Ok = parseSwizzleBroadcast(
Imm);
8287 }
elseif (trySkipId(IdSymbolic[ID_SWAP])) {
8288 Ok = parseSwizzleSwap(
Imm);
8289 }
elseif (trySkipId(IdSymbolic[ID_REVERSE])) {
8290 Ok = parseSwizzleReverse(
Imm);
8291 }
elseif (trySkipId(IdSymbolic[ID_FFT])) {
8292 Ok = parseSwizzleFFT(
Imm);
8293 }
elseif (trySkipId(IdSymbolic[ID_ROTATE])) {
8294 Ok = parseSwizzleRotate(
Imm);
8296Error(ModeLoc,
"expected a swizzle mode");
8309if (trySkipId(
"offset")) {
8313if (trySkipId(
"swizzle")) {
8314 Ok = parseSwizzleMacro(
Imm);
8316 Ok = parseSwizzleOffset(
Imm);
8320Operands.push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
8328AMDGPUOperand::isSwizzle()
const{
8329return isImmTy(ImmTySwizzle);
8332//===----------------------------------------------------------------------===// 8334//===----------------------------------------------------------------------===// 8336int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8350for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8351if (trySkipId(IdSymbolic[ModeId])) {
8359"expected a VGPR index mode or a closing parenthesis" :
8360"expected a VGPR index mode");
8365Error(S,
"duplicate VGPR index mode");
8373"expected a comma or a closing parenthesis"))
8388Imm = parseGPRIdxMacro();
8392if (getParser().parseAbsoluteExpression(
Imm))
8394if (
Imm < 0 || !isUInt<4>(
Imm))
8395returnError(S,
"invalid immediate: only 4-bit values are legal");
8399 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8403bool AMDGPUOperand::isGPRIdxMode()
const{
8404return isImmTy(ImmTyGprIdxMode);
8407//===----------------------------------------------------------------------===// 8408// sopp branch targets 8409//===----------------------------------------------------------------------===// 8413// Make sure we are not parsing something 8414// that looks like a label or an expression but is not. 8415// This will improve error messages. 8416if (isRegister() || isModifier())
8423assert(Opr.isImm() || Opr.isExpr());
8424SMLoc Loc = Opr.getStartLoc();
8426// Currently we do not support arbitrary expressions as branch targets. 8427// Only labels and absolute expressions are accepted. 8428if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8429Error(Loc,
"expected an absolute expression or a label");
8430 }
elseif (Opr.isImm() && !Opr.isS16Imm()) {
8431Error(Loc,
"expected a 16-bit signed jump offset");
8437//===----------------------------------------------------------------------===// 8438// Boolean holding registers 8439//===----------------------------------------------------------------------===// 8445//===----------------------------------------------------------------------===// 8447//===----------------------------------------------------------------------===// 8449void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8452 OptionalImmIndexMap OptionalIdx;
8453unsigned FirstOperandIdx = 1;
8454bool IsAtomicReturn =
false;
8461for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8462 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8464// Add the register arguments 8466Op.addRegOperands(Inst, 1);
8467// Insert a tied src for atomic return dst. 8468// This cannot be postponed as subsequent calls to 8469// addImmOperands rely on correct number of MC operands. 8470if (IsAtomicReturn && i == FirstOperandIdx)
8471Op.addRegOperands(Inst, 1);
8475// Handle the case where soffset is an immediate 8476if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8477Op.addImmOperands(Inst, 1);
8481// Handle tokens like 'offen' which are sometimes hard-coded into the 8482// asm string. There are no MCInst operands for these. 8488// Handle optional arguments 8489 OptionalIdx[
Op.getImmTy()] = i;
8496//===----------------------------------------------------------------------===// 8498//===----------------------------------------------------------------------===// 8500bool AMDGPUOperand::isSMRDOffset8()
const{
8501return isImmLiteral() && isUInt<8>(getImm());
8504bool AMDGPUOperand::isSMEMOffset()
const{
8505// Offset range is checked later by validator. 8506return isImmLiteral();
8509bool AMDGPUOperand::isSMRDLiteralOffset()
const{
8510// 32-bit literals are only supported on CI and we only want to use them 8511// when the offset is > 8-bits. 8512return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8515//===----------------------------------------------------------------------===// 8517//===----------------------------------------------------------------------===// 8541// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8542// This is intentional and ensures compatibility with sp3. 8543// See bug 35397 for details. 8544bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8545if (BoundCtrl == 0 || BoundCtrl == 1) {
8553void AMDGPUAsmParser::onBeginOfFile() {
8554if (!getParser().getStreamer().getTargetStreamer() ||
8558if (!getTargetStreamer().getTargetID())
8559 getTargetStreamer().initializeTargetID(getSTI(),
8560 getSTI().getFeatureString());
8563 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8566/// Parse AMDGPU specific expressions. 8568/// expr ::= or(expr, ...) | 8571bool AMDGPUAsmParser::parsePrimaryExpr(
constMCExpr *&Res,
SMLoc &EndLoc) {
8577 .
Case(
"max", AGVK::AGVK_Max)
8578 .
Case(
"or", AGVK::AGVK_Or)
8579 .
Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
8580 .
Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8581 .
Case(
"alignto", AGVK::AGVK_AlignTo)
8582 .
Case(
"occupancy", AGVK::AGVK_Occupancy)
8588 lex();
// Eat Arg ('or', 'max', 'occupancy', etc.) 8593Error(getToken().getLoc(),
8594"empty " +
Twine(TokenId) +
" expression");
8597if (CommaCount + 1 != Exprs.
size()) {
8598Error(getToken().getLoc(),
8599"mismatch of commas in " +
Twine(TokenId) +
" expression");
8606if (getParser().parseExpression(Expr, EndLoc))
8610if (LastTokenWasComma)
8613Error(getToken().getLoc(),
8614"unexpected token in " +
Twine(TokenId) +
" expression");
8620return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8626return parseIntWithPrefix(
"mul",
Operands,
8631return parseIntWithPrefix(
"div",
Operands,
8638// Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8639// the number of src operands present, then copies that bit into src0_modifiers. 8647constint Ops[] = { AMDGPU::OpName::src0,
8648 AMDGPU::OpName::src1,
8649 AMDGPU::OpName::src2 };
8665MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8669if ((OpSel & (1 << SrcNum)) != 0)
8675void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8682 OptionalImmIndexMap &OptionalIdx) {
8683 cvtVOP3P(Inst,
Operands, OptionalIdx);
8689// 1. This operand is input modifiers 8691// 2. This is not last operand 8692 &&
Desc.NumOperands > (OpNum + 1)
8693// 3. Next operand is register class 8694 &&
Desc.operands()[OpNum + 1].RegClass != -1
8695// 4. Next register is not tied to any other operand 8696 &&
Desc.getOperandConstraint(OpNum + 1,
8697 MCOI::OperandConstraint::TIED_TO) == -1;
8702 OptionalImmIndexMap OptionalIdx;
8707for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8708 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8711for (
unsigned E =
Operands.size();
I != E; ++
I) {
8712 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8714Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8715 }
elseif (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8716Op.isInterpAttrChan()) {
8718 }
elseif (
Op.isImmModifier()) {
8719 OptionalIdx[
Op.getImmTy()] =
I;
8727 AMDGPUOperand::ImmTyHigh);
8731 AMDGPUOperand::ImmTyClamp);
8735 AMDGPUOperand::ImmTyOModSI);
8740 OptionalImmIndexMap OptionalIdx;
8745for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8746 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8749for (
unsigned E =
Operands.size();
I != E; ++
I) {
8750 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8752Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8753 }
elseif (
Op.isImmModifier()) {
8754 OptionalIdx[
Op.getImmTy()] =
I;
8771constint Ops[] = { AMDGPU::OpName::src0,
8772 AMDGPU::OpName::src1,
8773 AMDGPU::OpName::src2 };
8774constint ModOps[] = { AMDGPU::OpName::src0_modifiers,
8775 AMDGPU::OpName::src1_modifiers,
8776 AMDGPU::OpName::src2_modifiers };
8780for (
int J = 0; J < 3; ++J) {
8788if ((OpSel & (1 << J)) != 0)
8790if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8791 (OpSel & (1 << 3)) != 0)
8799 OptionalImmIndexMap &OptionalIdx) {
8804for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8805 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8808for (
unsigned E =
Operands.size();
I != E; ++
I) {
8809 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8811Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8812 }
elseif (
Op.isImmModifier()) {
8813 OptionalIdx[
Op.getImmTy()] =
I;
8815Op.addRegOrImmOperands(Inst, 1);
8823 AMDGPUOperand::ImmTyByteSel);
8828 AMDGPUOperand::ImmTyClamp);
8832 AMDGPUOperand::ImmTyOModSI);
8834// Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8835// it has src2 register operand that is tied to dst operand 8836// we don't allow modifiers for this operand in assembler so src2_modifiers 8839auto *it = Inst.
begin();
8843// Copy the operand to ensure it's not invalidated when Inst grows. 8849 OptionalImmIndexMap OptionalIdx;
8850 cvtVOP3(Inst,
Operands, OptionalIdx);
8854 OptionalImmIndexMap &OptIdx) {
8860if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
8861 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
8862 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8863 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8864 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8865 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8870// Adding vdst_in operand is already covered for these DPP instructions in 8873 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8874 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8875 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8876 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8877 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8878 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8879 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8880 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8885if (BitOp3Idx != -1) {
8889// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8890// instruction, and then figure out where to actually put the modifiers 8893if (OpSelIdx != -1) {
8898if (OpSelHiIdx != -1) {
8912constint Ops[] = { AMDGPU::OpName::src0,
8913 AMDGPU::OpName::src1,
8914 AMDGPU::OpName::src2 };
8915constint ModOps[] = { AMDGPU::OpName::src0_modifiers,
8916 AMDGPU::OpName::src1_modifiers,
8917 AMDGPU::OpName::src2_modifiers };
8920unsigned OpSelHi = 0;
8927if (OpSelHiIdx != -1)
8936for (
int J = 0; J < 3; ++J) {
8949if (
SrcOp.isReg() && getMRI()
8956if ((OpSel & (1 << J)) != 0)
8960if ((OpSelHi & (1 << J)) != 0)
8963if ((NegLo & (1 << J)) != 0)
8966if ((NegHi & (1 << J)) != 0)
8974 OptionalImmIndexMap OptIdx;
8980unsigned i,
unsigned Opc,
unsignedOpName) {
8982 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8984 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8990 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8993 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
// srcTiedDef 8994 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
// src2 8996 OptionalImmIndexMap OptIdx;
8997for (
unsigned i = 5; i <
Operands.size(); ++i) {
8998 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8999 OptIdx[
Op.getImmTy()] = i;
9004 AMDGPUOperand::ImmTyIndexKey8bit);
9008 AMDGPUOperand::ImmTyIndexKey16bit);
9016//===----------------------------------------------------------------------===// 9018//===----------------------------------------------------------------------===// 9028Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9029SMLoc OpYLoc = getLoc();
9032Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9035returnError(OpYLoc,
"expected a VOPDY instruction after ::");
9040// Create VOPD MCInst operands using parsed assembler operands. 9042auto addOp = [&](
uint16_t ParsedOprIdx) {
// NOLINT:function pointer 9043 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
9045Op.addRegOperands(Inst, 1);
9049Op.addImmOperands(Inst, 1);
9057// MCInst operands are ordered as follows: 9058// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 9061 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9065constauto &CInfo = InstInfo[CompIdx];
9066auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9067for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9068 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9069if (CInfo.hasSrc2Acc())
9070 addOp(CInfo.getIndexOfDstInParsedOperands());
9074//===----------------------------------------------------------------------===// 9076//===----------------------------------------------------------------------===// 9078bool AMDGPUOperand::isDPP8()
const{
9079return isImmTy(ImmTyDPP8);
9082bool AMDGPUOperand::isDPPCtrl()
const{
9083using namespaceAMDGPU::DPP;
9085bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9087 int64_t
Imm = getImm();
9088return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
9089 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
9090 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
9091 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
9092 (
Imm == DppCtrl::WAVE_SHL1) ||
9093 (
Imm == DppCtrl::WAVE_ROL1) ||
9094 (
Imm == DppCtrl::WAVE_SHR1) ||
9095 (
Imm == DppCtrl::WAVE_ROR1) ||
9096 (
Imm == DppCtrl::ROW_MIRROR) ||
9097 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
9098 (
Imm == DppCtrl::BCAST15) ||
9099 (
Imm == DppCtrl::BCAST31) ||
9100 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
9101 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
9106//===----------------------------------------------------------------------===// 9108//===----------------------------------------------------------------------===// 9110bool AMDGPUOperand::isBLGP()
const{
9111returnisImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9114bool AMDGPUOperand::isS16Imm()
const{
9115return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9118bool AMDGPUOperand::isU16Imm()
const{
9119return isImmLiteral() && isUInt<16>(getImm());
9122//===----------------------------------------------------------------------===// 9124//===----------------------------------------------------------------------===// 9126bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
9127// We want to allow "dim:1D" etc., 9128// but the initial 1 is tokenized as an integer. 9131SMLoc Loc = getToken().getEndLoc();
9132 Token = std::string(getTokenStr());
9139if (!parseId(Suffix))
9165SMLoc Loc = getLoc();
9166if (!parseDimId(Encoding))
9167returnError(Loc,
"invalid dim value");
9169Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9170 AMDGPUOperand::ImmTyDim));
9174//===----------------------------------------------------------------------===// 9176//===----------------------------------------------------------------------===// 9184// dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 9191for (
size_t i = 0; i < 8; ++i) {
9195SMLoc Loc = getLoc();
9196if (getParser().parseAbsoluteExpression(Sels[i]))
9198if (0 > Sels[i] || 7 < Sels[i])
9199returnError(Loc,
"expected a 3-bit value");
9206for (
size_t i = 0; i < 8; ++i)
9207 DPP8 |= (Sels[i] << (i * 3));
9209Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9214AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9216if (Ctrl ==
"row_newbcast")
9219if (Ctrl ==
"row_share" ||
9223if (Ctrl ==
"wave_shl" ||
9224 Ctrl ==
"wave_shr" ||
9225 Ctrl ==
"wave_rol" ||
9226 Ctrl ==
"wave_ror" ||
9230returnCtrl ==
"row_mirror" ||
9231Ctrl ==
"row_half_mirror" ||
9232Ctrl ==
"quad_perm" ||
9239AMDGPUAsmParser::parseDPPCtrlPerm() {
9240// quad_perm:[%d,%d,%d,%d] 9246for (
int i = 0; i < 4; ++i) {
9251SMLoc Loc = getLoc();
9252if (getParser().parseAbsoluteExpression(Temp))
9254if (Temp < 0 || Temp > 3) {
9255Error(Loc,
"expected a 2-bit value");
9259 Val += (Temp << i * 2);
9269AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9270using namespaceAMDGPU::DPP;
9275SMLoc Loc = getLoc();
9277if (getParser().parseAbsoluteExpression(Val))
9287 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9288 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9289 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9290 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9291 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9292 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9293 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9294 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9295 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9296 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9300if (
Check.Ctrl == -1) {
9301 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9302 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9317using namespaceAMDGPU::DPP;
9320 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9329if (Ctrl ==
"row_mirror") {
9330 Val = DppCtrl::ROW_MIRROR;
9331 }
elseif (Ctrl ==
"row_half_mirror") {
9332 Val = DppCtrl::ROW_HALF_MIRROR;
9335if (Ctrl ==
"quad_perm") {
9336 Val = parseDPPCtrlPerm();
9338 Val = parseDPPCtrlSel(Ctrl);
9347 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9353 OptionalImmIndexMap OptionalIdx;
9357// MAC instructions are special because they have 'old' 9358// operand which is not tied to dst (but assumed to be). 9359// They also have dummy unused src2_modifiers. 9363bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9367for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9368 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9372for (
unsigned E =
Operands.size();
I != E; ++
I) {
9376if (OldIdx == NumOperands) {
9377// Handle old operand 9378constexprint DST_IDX = 0;
9380 }
elseif (Src2ModIdx == NumOperands) {
9381// Add unused dummy src2_modifiers 9391bool IsVOP3CvtSrDpp =
9392 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9393 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9394 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9395 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9396if (IsVOP3CvtSrDpp) {
9407// handle tied old or src2 for MAC instructions 9410 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9411// Add the register arguments 9412if (IsDPP8 &&
Op.isDppFI()) {
9415Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9416 }
elseif (
Op.isReg()) {
9417Op.addRegOperands(Inst, 1);
9418 }
elseif (
Op.isImm() &&
9420assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9421Op.addImmOperands(Inst, 1);
9422 }
elseif (
Op.isImm()) {
9423 OptionalIdx[
Op.getImmTy()] =
I;
9431 AMDGPUOperand::ImmTyByteSel);
9435 AMDGPUOperand::ImmTyClamp);
9441 cvtVOP3P(Inst,
Operands, OptionalIdx);
9443 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9460 AMDGPUOperand::ImmTyDppFI);
9465 OptionalImmIndexMap OptionalIdx;
9469for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9470 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9474for (
unsigned E =
Operands.size();
I != E; ++
I) {
9479// handle tied old or src2 for MAC instructions 9482 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9483// Add the register arguments 9484if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9485// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 9492Op.addImmOperands(Inst, 1);
9494Op.addRegWithFPInputModsOperands(Inst, 2);
9495 }
elseif (
Op.isDppFI()) {
9497 }
elseif (
Op.isReg()) {
9498Op.addRegOperands(Inst, 1);
9504Op.addRegWithFPInputModsOperands(Inst, 2);
9505 }
elseif (
Op.isReg()) {
9506Op.addRegOperands(Inst, 1);
9507 }
elseif (
Op.isDPPCtrl()) {
9508Op.addImmOperands(Inst, 1);
9509 }
elseif (
Op.isImm()) {
9510// Handle optional arguments 9511 OptionalIdx[
Op.getImmTy()] =
I;
9527 AMDGPUOperand::ImmTyDppFI);
9532//===----------------------------------------------------------------------===// 9534//===----------------------------------------------------------------------===// 9538 AMDGPUOperand::ImmTy
Type) {
9539return parseStringOrIntWithPrefix(
9541 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
9546return parseStringOrIntWithPrefix(
9547Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
9548 AMDGPUOperand::ImmTySDWADstUnused);
9577 OptionalImmIndexMap OptionalIdx;
9578bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9579bool SkippedVcc =
false;
9583for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9584 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9587for (
unsigned E =
Operands.size();
I != E; ++
I) {
9588 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9589if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9590 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9591// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9592// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9593// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9594// Skip VCC only if we didn't skip it on previous iteration. 9595// Note that src0 and src1 occupy 2 slots each because of modifiers. 9608Op.addRegOrImmWithInputModsOperands(Inst, 2);
9609 }
elseif (
Op.isImm()) {
9610// Handle optional arguments 9611 OptionalIdx[
Op.getImmTy()] =
I;
9619if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9620 Opc != AMDGPU::V_NOP_sdwa_vi) {
9621// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9622switch (BasicInstType) {
9626 AMDGPUOperand::ImmTyClamp, 0);
9630 AMDGPUOperand::ImmTyOModSI, 0);
9634 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9638 AMDGPUOperand::ImmTySDWADstUnused,
9639 DstUnused::UNUSED_PRESERVE);
9646 AMDGPUOperand::ImmTyClamp, 0);
9660 AMDGPUOperand::ImmTyClamp, 0);
9666llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9670// special case v_mac_{f16, f32}: 9671// it has src2 register operand that is tied to dst operand 9672if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9673 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9674auto *it = Inst.
begin();
9681/// Force static initialization. 9687#define GET_REGISTER_MATCHER 9688#define GET_MATCHER_IMPLEMENTATION 9689#define GET_MNEMONIC_SPELL_CHECKER 9690#define GET_MNEMONIC_CHECKER 9691#include "AMDGPUGenAsmMatcher.inc" 9697return parseTokenOp(
"addr64",
Operands);
9699return parseTokenOp(
"done",
Operands);
9701return parseTokenOp(
"idxen",
Operands);
9703return parseTokenOp(
"lds",
Operands);
9705return parseTokenOp(
"offen",
Operands);
9707return parseTokenOp(
"off",
Operands);
9709return parseTokenOp(
"row_en",
Operands);
9711return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9713return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9715return tryCustomParseOperand(
Operands, MCK);
9718// This function should be defined after auto-generated include so that we have 9719// MatchClassKind enum defined 9722// Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9723// But MatchInstructionImpl() expects to meet token and fails to validate 9724// operand. This method checks if we are given immediate operand but expect to 9725// get corresponding token. 9726 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9729return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9731return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9733return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9735return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9737return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9739return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9741// When operands have expression values, they will return true for isToken, 9742// because it is not possible to distinguish between a token and an 9743// expression at parse time. MatchInstructionImpl() will always try to 9744// match an operand as a token, when isToken returns true, and when the 9745// name of the expression is not a valid token, the match will fail, 9746// so we need to handle it here. 9747return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9749return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9750case MCK_SOPPBrTarget:
9751return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9752case MCK_VReg32OrOff:
9753return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9755return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9757return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9758case MCK_InterpAttrChan:
9759return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9761case MCK_SReg_64_XEXEC:
9762// Null is defined as a 32-bit register but 9763// it should also be enabled with 64-bit operands or larger. 9764// The following code enables it for SReg_64 and larger operands 9765// used as source and destination. Remaining source 9766// operands are handled in isInlinableImm. 9771return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9773return Match_InvalidOperand;
9777//===----------------------------------------------------------------------===// 9779//===----------------------------------------------------------------------===// 9786// The operand is optional, if not present default to 0 9790if (!isUInt<16>(
Imm))
9791returnError(S,
"expected a 16-bit value");
9794 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
9798bool AMDGPUOperand::isEndpgm()
const{
return isImmTy(ImmTyEndpgm); }
9800//===----------------------------------------------------------------------===// 9802//===----------------------------------------------------------------------===// 9804bool AMDGPUOperand::isSplitBarrier()
const{
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
Parse one assembly instruction.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
Recognize a series of operands of a parsed instruction as an actual MCInst and emit it to the specifi...
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Format
The format used for serializing/deserializing remarks.
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
const char * toString(DWARFSectionKind Kind)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
Direction
An enum for the direction of the loop.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size