- Notifications
You must be signed in to change notification settings - Fork14.5k
[RISCV] Separate the analysis part of RISCVInsertVSETVLI.#149574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Mikhail Gudim (mgudim) ChangesThis analysis can be reused in other places. Patch is 105.54 KiB, truncated to 20.00 KiB below, full version:https://github.com/llvm/llvm-project/pull/149574.diff 7 Files Affected:
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txtindex 47329b2c2f4d2..427f69f7c5597 100644--- a/llvm/lib/Target/RISCV/CMakeLists.txt+++ b/llvm/lib/Target/RISCV/CMakeLists.txt@@ -65,6 +65,7 @@ add_llvm_target(RISCVCodeGen RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp+ RISCVVConfigAnalysis.cpp RISCVVectorMaskDAGMutation.cpp RISCVVectorPeephole.cpp RISCVVLOptimizer.cppdiff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.hindex ae9410193efe1..f004b2c75a6d4 100644--- a/llvm/lib/Target/RISCV/RISCV.h+++ b/llvm/lib/Target/RISCV/RISCV.h@@ -111,6 +111,8 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &); FunctionPass *createRISCVPreLegalizerCombiner(); void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);+void initializeRISCVVConfigWrapperPassPass(PassRegistry &);+ FunctionPass *createRISCVVLOptimizerPass(); void initializeRISCVVLOptimizerPass(PassRegistry &);diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cppindex 90e1c47a71c89..c3e136171c5c1 100644--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp@@ -9,23 +9,11 @@ // This file implements a function pass that inserts VSETVLI instructions where // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL // instructions.-//-// This pass consists of 3 phases:-//-// Phase 1 collects how each basic block affects VL/VTYPE.-//-// Phase 2 uses the information from phase 1 to do a data flow analysis to-// propagate the VL/VTYPE changes through the function. This gives us the-// VL/VTYPE at the start of each basic block.-//-// Phase 3 inserts VSETVLI instructions in each basic block. Information from-// phase 2 is used to prevent inserting a VSETVLI before the first vector-// instruction in the block if possible.-// //===----------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVSubtarget.h"+#include "RISCVVConfigAnalysis.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveDebugVariables.h"@@ -49,814 +37,18 @@ static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE( namespace {-/// Given a virtual register \p Reg, return the corresponding VNInfo for it.-/// This will return nullptr if the virtual register is an implicit_def or-/// if LiveIntervals is not available.-static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,- const LiveIntervals *LIS) {- assert(Reg.isVirtual());- if (!LIS)- return nullptr;- auto &LI = LIS->getInterval(Reg);- SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);- return LI.getVNInfoBefore(SI);-}- static unsigned getVLOpNum(const MachineInstr &MI) { return RISCVII::getVLOpNum(MI.getDesc()); }-static unsigned getSEWOpNum(const MachineInstr &MI) {- return RISCVII::getSEWOpNum(MI.getDesc());-}--/// Get the EEW for a load or store instruction. Return std::nullopt if MI is-/// not a load or store which ignores SEW.-static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {- switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {- default:- return std::nullopt;- case RISCV::VLE8_V:- case RISCV::VLSE8_V:- case RISCV::VSE8_V:- case RISCV::VSSE8_V:- return 8;- case RISCV::VLE16_V:- case RISCV::VLSE16_V:- case RISCV::VSE16_V:- case RISCV::VSSE16_V:- return 16;- case RISCV::VLE32_V:- case RISCV::VLSE32_V:- case RISCV::VSE32_V:- case RISCV::VSSE32_V:- return 32;- case RISCV::VLE64_V:- case RISCV::VLSE64_V:- case RISCV::VSE64_V:- case RISCV::VSSE64_V:- return 64;- }-}--/// Return true if this is an operation on mask registers. Note that-/// this includes both arithmetic/logical ops and load/store (vlm/vsm).-static bool isMaskRegOp(const MachineInstr &MI) {- if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))- return false;- const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();- // A Log2SEW of 0 is an operation on mask registers only.- return Log2SEW == 0;-}--/// Return true if the inactive elements in the result are entirely undefined.-/// Note that this is different from "agnostic" as defined by the vector-/// specification. Agnostic requires each lane to either be undisturbed, or-/// take the value -1; no other value is allowed.-static bool hasUndefinedPassthru(const MachineInstr &MI) {-- unsigned UseOpIdx;- if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))- // If there is no passthrough operand, then the pass through- // lanes are undefined.- return true;-- // All undefined passthrus should be $noreg: see- // RISCVDAGToDAGISel::doPeepholeNoRegPassThru- const MachineOperand &UseMO = MI.getOperand(UseOpIdx);- return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();-}--/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.-static bool isVectorCopy(const TargetRegisterInfo *TRI,- const MachineInstr &MI) {- return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&- RISCVRegisterInfo::isRVVRegClass(- TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));-}--/// Which subfields of VL or VTYPE have values we need to preserve?-struct DemandedFields {- // Some unknown property of VL is used. If demanded, must preserve entire- // value.- bool VLAny = false;- // Only zero vs non-zero is used. If demanded, can change non-zero values.- bool VLZeroness = false;- // What properties of SEW we need to preserve.- enum : uint8_t {- SEWEqual = 3, // The exact value of SEW needs to be preserved.- SEWGreaterThanOrEqualAndLessThan64 =- 2, // SEW can be changed as long as it's greater- // than or equal to the original value, but must be less- // than 64.- SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater- // than or equal to the original value.- SEWNone = 0 // We don't need to preserve SEW at all.- } SEW = SEWNone;- enum : uint8_t {- LMULEqual = 2, // The exact value of LMUL needs to be preserved.- LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.- LMULNone = 0 // We don't need to preserve LMUL at all.- } LMUL = LMULNone;- bool SEWLMULRatio = false;- bool TailPolicy = false;- bool MaskPolicy = false;- // If this is true, we demand that VTYPE is set to some legal state, i.e. that- // vill is unset.- bool VILL = false;-- // Return true if any part of VTYPE was used- bool usedVTYPE() const {- return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;- }-- // Return true if any property of VL was used- bool usedVL() {- return VLAny || VLZeroness;- }-- // Mark all VTYPE subfields and properties as demanded- void demandVTYPE() {- SEW = SEWEqual;- LMUL = LMULEqual;- SEWLMULRatio = true;- TailPolicy = true;- MaskPolicy = true;- VILL = true;- }-- // Mark all VL properties as demanded- void demandVL() {- VLAny = true;- VLZeroness = true;- }-- static DemandedFields all() {- DemandedFields DF;- DF.demandVTYPE();- DF.demandVL();- return DF;- }-- // Make this the result of demanding both the fields in this and B.- void doUnion(const DemandedFields &B) {- VLAny |= B.VLAny;- VLZeroness |= B.VLZeroness;- SEW = std::max(SEW, B.SEW);- LMUL = std::max(LMUL, B.LMUL);- SEWLMULRatio |= B.SEWLMULRatio;- TailPolicy |= B.TailPolicy;- MaskPolicy |= B.MaskPolicy;- VILL |= B.VILL;- }--#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)- /// Support for debugging, callable in GDB: V->dump()- LLVM_DUMP_METHOD void dump() const {- print(dbgs());- dbgs() << "\n";- }-- /// Implement operator<<.- void print(raw_ostream &OS) const {- OS << "{";- OS << "VLAny=" << VLAny << ", ";- OS << "VLZeroness=" << VLZeroness << ", ";- OS << "SEW=";- switch (SEW) {- case SEWEqual:- OS << "SEWEqual";- break;- case SEWGreaterThanOrEqual:- OS << "SEWGreaterThanOrEqual";- break;- case SEWGreaterThanOrEqualAndLessThan64:- OS << "SEWGreaterThanOrEqualAndLessThan64";- break;- case SEWNone:- OS << "SEWNone";- break;- };- OS << ", ";- OS << "LMUL=";- switch (LMUL) {- case LMULEqual:- OS << "LMULEqual";- break;- case LMULLessThanOrEqualToM1:- OS << "LMULLessThanOrEqualToM1";- break;- case LMULNone:- OS << "LMULNone";- break;- };- OS << ", ";- OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";- OS << "TailPolicy=" << TailPolicy << ", ";- OS << "MaskPolicy=" << MaskPolicy << ", ";- OS << "VILL=" << VILL;- OS << "}";- }-#endif-};--#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)-LLVM_ATTRIBUTE_USED-inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {- DF.print(OS);- return OS;-}-#endif--static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) {- auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);- return Fractional || LMul == 1;-}--/// Return true if moving from CurVType to NewVType is-/// indistinguishable from the perspective of an instruction (or set-/// of instructions) which use only the Used subfields and properties.-static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,- const DemandedFields &Used) {- switch (Used.SEW) {- case DemandedFields::SEWNone:- break;- case DemandedFields::SEWEqual:- if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))- return false;- break;- case DemandedFields::SEWGreaterThanOrEqual:- if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))- return false;- break;- case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:- if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||- RISCVVType::getSEW(NewVType) >= 64)- return false;- break;- }-- switch (Used.LMUL) {- case DemandedFields::LMULNone:- break;- case DemandedFields::LMULEqual:- if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))- return false;- break;- case DemandedFields::LMULLessThanOrEqualToM1:- if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))- return false;- break;- }-- if (Used.SEWLMULRatio) {- auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),- RISCVVType::getVLMUL(CurVType));- auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),- RISCVVType::getVLMUL(NewVType));- if (Ratio1 != Ratio2)- return false;- }-- if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=- RISCVVType::isTailAgnostic(NewVType))- return false;- if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=- RISCVVType::isMaskAgnostic(NewVType))- return false;- return true;-}--/// Return the fields and properties demanded by the provided instruction.-DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {- // This function works in coalesceVSETVLI too. We can still use the value of a- // SEW, VL, or Policy operand even though it might not be the exact value in- // the VL or VTYPE, since we only care about what the instruction originally- // demanded.-- // Most instructions don't use any of these subfeilds.- DemandedFields Res;- // Start conservative if registers are used- if (MI.isCall() || MI.isInlineAsm() ||- MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))- Res.demandVL();- if (MI.isCall() || MI.isInlineAsm() ||- MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))- Res.demandVTYPE();- // Start conservative on the unlowered form too- uint64_t TSFlags = MI.getDesc().TSFlags;- if (RISCVII::hasSEWOp(TSFlags)) {- Res.demandVTYPE();- if (RISCVII::hasVLOp(TSFlags))- if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));- !VLOp.isReg() || !VLOp.isUndef())- Res.demandVL();-- // Behavior is independent of mask policy.- if (!RISCVII::usesMaskPolicy(TSFlags))- Res.MaskPolicy = false;- }-- // Loads and stores with implicit EEW do not demand SEW or LMUL directly.- // They instead demand the ratio of the two which is used in computing- // EMUL, but which allows us the flexibility to change SEW and LMUL- // provided we don't change the ratio.- // Note: We assume that the instructions initial SEW is the EEW encoded- // in the opcode. This is asserted when constructing the VSETVLIInfo.- if (getEEWForLoadStore(MI)) {- Res.SEW = DemandedFields::SEWNone;- Res.LMUL = DemandedFields::LMULNone;- }-- // Store instructions don't use the policy fields.- if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {- Res.TailPolicy = false;- Res.MaskPolicy = false;- }-- // If this is a mask reg operation, it only cares about VLMAX.- // TODO: Possible extensions to this logic- // * Probably ok if available VLMax is larger than demanded- // * The policy bits can probably be ignored..- if (isMaskRegOp(MI)) {- Res.SEW = DemandedFields::SEWNone;- Res.LMUL = DemandedFields::LMULNone;- }-- // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.- if (RISCVInstrInfo::isScalarInsertInstr(MI)) {- Res.LMUL = DemandedFields::LMULNone;- Res.SEWLMULRatio = false;- Res.VLAny = false;- // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't- // need to preserve any other bits and are thus compatible with any larger,- // etype and can disregard policy bits. Warning: It's tempting to try doing- // this for any tail agnostic operation, but we can't as TA requires- // tail lanes to either be the original value or -1. We are writing- // unknown bits to the lanes here.- if (hasUndefinedPassthru(MI)) {- if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&- !ST->hasVInstructionsF64())- Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;- else- Res.SEW = DemandedFields::SEWGreaterThanOrEqual;- Res.TailPolicy = false;- }- }-- // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.- if (RISCVInstrInfo::isScalarExtractInstr(MI)) {- assert(!RISCVII::hasVLOp(TSFlags));- Res.LMUL = DemandedFields::LMULNone;- Res.SEWLMULRatio = false;- Res.TailPolicy = false;- Res.MaskPolicy = false;- }-- if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {- const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));- // A slidedown/slideup with an *undefined* passthru can freely clobber- // elements not copied from the source vector (e.g. masked off, tail, or- // slideup's prefix). Notes:- // * We can't modify SEW here since the slide amount is in units of SEW.- // * VL=1 is special only because we have existing support for zero vs- // non-zero VL. We could generalize this if we had a VL > C predicate.- // * The LMUL1 restriction is for machines whose latency may depend on LMUL.- // * As above, this is only legal for tail "undefined" not "agnostic".- // * We avoid increasing vl if the subtarget has +vl-dependent-latency- if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&- VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&- !ST->hasVLDependentLatency()) {- Res.VLAny = false;- Res.VLZeroness = true;- Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;- Res.TailPolicy = false;- }-- // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the- // same semantically as vmv.s.x. This is particularly useful since we don't- // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in- // it's place. Since a splat is non-constant time in LMUL, we do need to be- // careful to not increase the number of active vector registers (unlike for- // vmv.s.x.)- if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&- VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&- !ST->hasVLDependentLatency()) {- Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;- Res.SEWLMULRatio = false;- Res.VLAny = false;- if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&- !ST->hasVInstructionsF64())- Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;- else- Res.SEW = DemandedFields::SEWGreaterThanOrEqual;- Res.TailPolicy = false;- }- }-- // In §32.16.6, whole vector register moves have a dependency on SEW. At the- // MIR level though we don't encode the element type, and it gives the same- // result whatever the SEW may be.- //- // However it does need valid SEW, i.e. vill must be cleared. The entry to a- // function, calls and inline assembly may all set it, so make sure we clear- // it for whole register copies. Do this by leaving VILL demanded.- if (isVectorCopy(ST->getRegisterInfo(), MI)) {- Res.LMUL = DemandedFields::LMULNone;- Res.SEW = DemandedFields::SEWNone;- Res.SEWLMULRatio = false;- Res.TailPolicy = false;- Res.MaskPolicy = false;- }-- if (RISCVInstrInfo::isVExtractInstr(MI)) {- assert(!RISCVII::hasVLOp(TSFlags));- // TODO: LMUL can be any larger value (without cost)- Res.TailPolicy = false;- }-- return Res;-}--/// Defines the abstract state with which the forward dataflow models the-/// values of the VL and VTYPE registers after insertion.-class VSETVLIInfo {- struct AVLDef {- // Every AVLDef should have a VNInfo, unless we're running without- // LiveIntervals in which case this will be nullptr.- const VNInfo *ValNo;- Register DefReg;- };- union {- AVLDef AVLRegDef;- unsigned AVLImm;- };-- enum : uint8_t {- Uninitialized,- AVLIsReg,- AVLIsImm,- AVLIsVLMAX,- Unknown, // AVL and VTYPE are fully unknown- } State = Uninitialized;-- // Fields from VTYPE.- RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1;- uint8_t SEW = 0;- uint8_t TailAgnostic : 1;- uint8_t MaskAgnostic : 1;- uint8_t SEWLMULRatioOnly : 1;--public:- VSETVLIInfo()- : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),- SEWLMULRatioOnly(false) {}-- static VSETVLIInfo getUnknown() {- VSETVLIInfo Info;- Info.setUnknown();- return Info;- }-- bool isValid() const { return State != Uninitialized; }- void setUnknown() { State = Unknown; }- bool isUnknown() const { return State == Unknown; }-- void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {- assert(AVLReg.isVirtual());- AVLRegDef.ValNo = VNInfo;- AVLRegDef.DefReg = AVLReg;- State = AVLIsReg;- }-- void setAVLImm(unsigned Imm) {- AVLImm = Imm;- State = AVLIsImm;- }-- void setAVLVLMAX() { State = AVLIsVLMAX; }-- bool hasAVLImm() const { return State == AVLIsImm; }- bool hasAVLReg() const { return State == AVLIsReg; }- bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }- Register getAVLReg() const {- assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());- return AVLRegDef.DefReg;- }- unsigned getAVLImm() const {- assert(hasAVLImm());- return AVLImm;- }- const VNInfo *getAVLVNInfo() const {- assert(hasAVLReg());- return AVLRegDef.ValNo;- }- // Most AVLIsReg infos will have a single defining MachineInstr, unless it was- // a PHI node. In that case getAVLVNInfo()->def will point to the block- // boundary slot and this will return nullptr. If LiveIntervals isn't- // available, nullptr is also returned.- const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {- assert(hasAVLReg());- if (!LIS || getAVLVNInfo()->isPHIDef())- return nullptr;- auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);- assert(MI);- return MI;...[truncated] |
I realise it's a huge diff, but it is very mechanical: just moved functions to a different place. Also this is pretty hard to split, since if a function is moved all the other function called by it have to be moved as well. Let me know if you think this should be broken down into more commits. |
This analysis can be reused in other places.Also, moved some utility functions into RISCVInstrInfo.
686e059
to485d3b5
Compare// | ||
//===----------------------------------------------------------------------===// | ||
/// \file | ||
/// This is the RISCV analysis of vector unit config. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others.Learn more.
RISC-V
@@ -0,0 +1,675 @@ | |||
//===- RISCVVConfigAnalysis --------------------------------------*- C++ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others.Learn more.
cpp files don't need-*- C++ -*-
@@ -0,0 +1,675 @@ | |||
//===- RISCVVConfigAnalysis --------------------------------------*- C++ | |||
//-*-===// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others.Learn more.
This should not be line wrapped.
This analysis can be reused in other places.
Also, moved some utility functions into RISCVInstrInfo.