[llvm] [RISCV] Separate the analysis part of RISCVInsertVSETVLI. (PR #149574)
Mikhail Gudim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 18 12:50:53 PDT 2025
https://github.com/mgudim updated https://github.com/llvm/llvm-project/pull/149574
>From 485d3b5c527b692786628cf88603929c5317db9d Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Thu, 17 Jul 2025 06:06:46 -0700
Subject: [PATCH] [RISCV] Separate the analysis part of RISCVInsertVSETVLI.
This analysis can be reused in other places.
Also, moved some utility functions into RISCVInstrInfo.
---
llvm/lib/Target/RISCV/CMakeLists.txt | 1 +
llvm/lib/Target/RISCV/RISCV.h | 2 +
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 1285 +----------------
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 58 +
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 19 +
.../lib/Target/RISCV/RISCVVConfigAnalysis.cpp | 675 +++++++++
llvm/lib/Target/RISCV/RISCVVConfigAnalysis.h | 620 ++++++++
7 files changed, 1407 insertions(+), 1253 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVVConfigAnalysis.cpp
create mode 100644 llvm/lib/Target/RISCV/RISCVVConfigAnalysis.h
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 47329b2c2f4d2..427f69f7c5597 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -65,6 +65,7 @@ add_llvm_target(RISCVCodeGen
RISCVTargetMachine.cpp
RISCVTargetObjectFile.cpp
RISCVTargetTransformInfo.cpp
+ RISCVVConfigAnalysis.cpp
RISCVVectorMaskDAGMutation.cpp
RISCVVectorPeephole.cpp
RISCVVLOptimizer.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index ae9410193efe1..f004b2c75a6d4 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -111,6 +111,8 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &);
FunctionPass *createRISCVPreLegalizerCombiner();
void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);
+void initializeRISCVVConfigWrapperPassPass(PassRegistry &);
+
FunctionPass *createRISCVVLOptimizerPass();
void initializeRISCVVLOptimizerPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 90e1c47a71c89..c3e136171c5c1 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -9,23 +9,11 @@
// This file implements a function pass that inserts VSETVLI instructions where
// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
// instructions.
-//
-// This pass consists of 3 phases:
-//
-// Phase 1 collects how each basic block affects VL/VTYPE.
-//
-// Phase 2 uses the information from phase 1 to do a data flow analysis to
-// propagate the VL/VTYPE changes through the function. This gives us the
-// VL/VTYPE at the start of each basic block.
-//
-// Phase 3 inserts VSETVLI instructions in each basic block. Information from
-// phase 2 is used to prevent inserting a VSETVLI before the first vector
-// instruction in the block if possible.
-//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
+#include "RISCVVConfigAnalysis.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
@@ -49,814 +37,18 @@ static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
namespace {
-/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
-/// This will return nullptr if the virtual register is an implicit_def or
-/// if LiveIntervals is not available.
-static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
- const LiveIntervals *LIS) {
- assert(Reg.isVirtual());
- if (!LIS)
- return nullptr;
- auto &LI = LIS->getInterval(Reg);
- SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
- return LI.getVNInfoBefore(SI);
-}
-
static unsigned getVLOpNum(const MachineInstr &MI) {
return RISCVII::getVLOpNum(MI.getDesc());
}
-static unsigned getSEWOpNum(const MachineInstr &MI) {
- return RISCVII::getSEWOpNum(MI.getDesc());
-}
-
-/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
-/// not a load or store which ignores SEW.
-static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
- switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
- default:
- return std::nullopt;
- case RISCV::VLE8_V:
- case RISCV::VLSE8_V:
- case RISCV::VSE8_V:
- case RISCV::VSSE8_V:
- return 8;
- case RISCV::VLE16_V:
- case RISCV::VLSE16_V:
- case RISCV::VSE16_V:
- case RISCV::VSSE16_V:
- return 16;
- case RISCV::VLE32_V:
- case RISCV::VLSE32_V:
- case RISCV::VSE32_V:
- case RISCV::VSSE32_V:
- return 32;
- case RISCV::VLE64_V:
- case RISCV::VLSE64_V:
- case RISCV::VSE64_V:
- case RISCV::VSSE64_V:
- return 64;
- }
-}
-
-/// Return true if this is an operation on mask registers. Note that
-/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
-static bool isMaskRegOp(const MachineInstr &MI) {
- if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
- return false;
- const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
- // A Log2SEW of 0 is an operation on mask registers only.
- return Log2SEW == 0;
-}
-
-/// Return true if the inactive elements in the result are entirely undefined.
-/// Note that this is different from "agnostic" as defined by the vector
-/// specification. Agnostic requires each lane to either be undisturbed, or
-/// take the value -1; no other value is allowed.
-static bool hasUndefinedPassthru(const MachineInstr &MI) {
-
- unsigned UseOpIdx;
- if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
- // If there is no passthrough operand, then the pass through
- // lanes are undefined.
- return true;
-
- // All undefined passthrus should be $noreg: see
- // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
- const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
-}
-
-/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
-static bool isVectorCopy(const TargetRegisterInfo *TRI,
- const MachineInstr &MI) {
- return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
- RISCVRegisterInfo::isRVVRegClass(
- TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
-}
-
-/// Which subfields of VL or VTYPE have values we need to preserve?
-struct DemandedFields {
- // Some unknown property of VL is used. If demanded, must preserve entire
- // value.
- bool VLAny = false;
- // Only zero vs non-zero is used. If demanded, can change non-zero values.
- bool VLZeroness = false;
- // What properties of SEW we need to preserve.
- enum : uint8_t {
- SEWEqual = 3, // The exact value of SEW needs to be preserved.
- SEWGreaterThanOrEqualAndLessThan64 =
- 2, // SEW can be changed as long as it's greater
- // than or equal to the original value, but must be less
- // than 64.
- SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
- // than or equal to the original value.
- SEWNone = 0 // We don't need to preserve SEW at all.
- } SEW = SEWNone;
- enum : uint8_t {
- LMULEqual = 2, // The exact value of LMUL needs to be preserved.
- LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
- LMULNone = 0 // We don't need to preserve LMUL at all.
- } LMUL = LMULNone;
- bool SEWLMULRatio = false;
- bool TailPolicy = false;
- bool MaskPolicy = false;
- // If this is true, we demand that VTYPE is set to some legal state, i.e. that
- // vill is unset.
- bool VILL = false;
-
- // Return true if any part of VTYPE was used
- bool usedVTYPE() const {
- return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
- }
-
- // Return true if any property of VL was used
- bool usedVL() {
- return VLAny || VLZeroness;
- }
-
- // Mark all VTYPE subfields and properties as demanded
- void demandVTYPE() {
- SEW = SEWEqual;
- LMUL = LMULEqual;
- SEWLMULRatio = true;
- TailPolicy = true;
- MaskPolicy = true;
- VILL = true;
- }
-
- // Mark all VL properties as demanded
- void demandVL() {
- VLAny = true;
- VLZeroness = true;
- }
-
- static DemandedFields all() {
- DemandedFields DF;
- DF.demandVTYPE();
- DF.demandVL();
- return DF;
- }
-
- // Make this the result of demanding both the fields in this and B.
- void doUnion(const DemandedFields &B) {
- VLAny |= B.VLAny;
- VLZeroness |= B.VLZeroness;
- SEW = std::max(SEW, B.SEW);
- LMUL = std::max(LMUL, B.LMUL);
- SEWLMULRatio |= B.SEWLMULRatio;
- TailPolicy |= B.TailPolicy;
- MaskPolicy |= B.MaskPolicy;
- VILL |= B.VILL;
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Support for debugging, callable in GDB: V->dump()
- LLVM_DUMP_METHOD void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
-
- /// Implement operator<<.
- void print(raw_ostream &OS) const {
- OS << "{";
- OS << "VLAny=" << VLAny << ", ";
- OS << "VLZeroness=" << VLZeroness << ", ";
- OS << "SEW=";
- switch (SEW) {
- case SEWEqual:
- OS << "SEWEqual";
- break;
- case SEWGreaterThanOrEqual:
- OS << "SEWGreaterThanOrEqual";
- break;
- case SEWGreaterThanOrEqualAndLessThan64:
- OS << "SEWGreaterThanOrEqualAndLessThan64";
- break;
- case SEWNone:
- OS << "SEWNone";
- break;
- };
- OS << ", ";
- OS << "LMUL=";
- switch (LMUL) {
- case LMULEqual:
- OS << "LMULEqual";
- break;
- case LMULLessThanOrEqualToM1:
- OS << "LMULLessThanOrEqualToM1";
- break;
- case LMULNone:
- OS << "LMULNone";
- break;
- };
- OS << ", ";
- OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
- OS << "TailPolicy=" << TailPolicy << ", ";
- OS << "MaskPolicy=" << MaskPolicy << ", ";
- OS << "VILL=" << VILL;
- OS << "}";
- }
-#endif
-};
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_ATTRIBUTE_USED
-inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
- DF.print(OS);
- return OS;
-}
-#endif
-
-static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) {
- auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
- return Fractional || LMul == 1;
-}
-
-/// Return true if moving from CurVType to NewVType is
-/// indistinguishable from the perspective of an instruction (or set
-/// of instructions) which use only the Used subfields and properties.
-static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
- const DemandedFields &Used) {
- switch (Used.SEW) {
- case DemandedFields::SEWNone:
- break;
- case DemandedFields::SEWEqual:
- if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
- return false;
- break;
- case DemandedFields::SEWGreaterThanOrEqual:
- if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
- return false;
- break;
- case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
- if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
- RISCVVType::getSEW(NewVType) >= 64)
- return false;
- break;
- }
-
- switch (Used.LMUL) {
- case DemandedFields::LMULNone:
- break;
- case DemandedFields::LMULEqual:
- if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
- return false;
- break;
- case DemandedFields::LMULLessThanOrEqualToM1:
- if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
- return false;
- break;
- }
-
- if (Used.SEWLMULRatio) {
- auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
- RISCVVType::getVLMUL(CurVType));
- auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
- RISCVVType::getVLMUL(NewVType));
- if (Ratio1 != Ratio2)
- return false;
- }
-
- if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
- RISCVVType::isTailAgnostic(NewVType))
- return false;
- if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
- RISCVVType::isMaskAgnostic(NewVType))
- return false;
- return true;
-}
-
-/// Return the fields and properties demanded by the provided instruction.
-DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
- // This function works in coalesceVSETVLI too. We can still use the value of a
- // SEW, VL, or Policy operand even though it might not be the exact value in
- // the VL or VTYPE, since we only care about what the instruction originally
- // demanded.
-
- // Most instructions don't use any of these subfeilds.
- DemandedFields Res;
- // Start conservative if registers are used
- if (MI.isCall() || MI.isInlineAsm() ||
- MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
- Res.demandVL();
- if (MI.isCall() || MI.isInlineAsm() ||
- MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
- Res.demandVTYPE();
- // Start conservative on the unlowered form too
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (RISCVII::hasSEWOp(TSFlags)) {
- Res.demandVTYPE();
- if (RISCVII::hasVLOp(TSFlags))
- if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
- !VLOp.isReg() || !VLOp.isUndef())
- Res.demandVL();
-
- // Behavior is independent of mask policy.
- if (!RISCVII::usesMaskPolicy(TSFlags))
- Res.MaskPolicy = false;
- }
-
- // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
- // They instead demand the ratio of the two which is used in computing
- // EMUL, but which allows us the flexibility to change SEW and LMUL
- // provided we don't change the ratio.
- // Note: We assume that the instructions initial SEW is the EEW encoded
- // in the opcode. This is asserted when constructing the VSETVLIInfo.
- if (getEEWForLoadStore(MI)) {
- Res.SEW = DemandedFields::SEWNone;
- Res.LMUL = DemandedFields::LMULNone;
- }
-
- // Store instructions don't use the policy fields.
- if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
- Res.TailPolicy = false;
- Res.MaskPolicy = false;
- }
-
- // If this is a mask reg operation, it only cares about VLMAX.
- // TODO: Possible extensions to this logic
- // * Probably ok if available VLMax is larger than demanded
- // * The policy bits can probably be ignored..
- if (isMaskRegOp(MI)) {
- Res.SEW = DemandedFields::SEWNone;
- Res.LMUL = DemandedFields::LMULNone;
- }
-
- // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
- if (RISCVInstrInfo::isScalarInsertInstr(MI)) {
- Res.LMUL = DemandedFields::LMULNone;
- Res.SEWLMULRatio = false;
- Res.VLAny = false;
- // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
- // need to preserve any other bits and are thus compatible with any larger,
- // etype and can disregard policy bits. Warning: It's tempting to try doing
- // this for any tail agnostic operation, but we can't as TA requires
- // tail lanes to either be the original value or -1. We are writing
- // unknown bits to the lanes here.
- if (hasUndefinedPassthru(MI)) {
- if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
- !ST->hasVInstructionsF64())
- Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
- else
- Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
- Res.TailPolicy = false;
- }
- }
-
- // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
- if (RISCVInstrInfo::isScalarExtractInstr(MI)) {
- assert(!RISCVII::hasVLOp(TSFlags));
- Res.LMUL = DemandedFields::LMULNone;
- Res.SEWLMULRatio = false;
- Res.TailPolicy = false;
- Res.MaskPolicy = false;
- }
-
- if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
- const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
- // A slidedown/slideup with an *undefined* passthru can freely clobber
- // elements not copied from the source vector (e.g. masked off, tail, or
- // slideup's prefix). Notes:
- // * We can't modify SEW here since the slide amount is in units of SEW.
- // * VL=1 is special only because we have existing support for zero vs
- // non-zero VL. We could generalize this if we had a VL > C predicate.
- // * The LMUL1 restriction is for machines whose latency may depend on LMUL.
- // * As above, this is only legal for tail "undefined" not "agnostic".
- // * We avoid increasing vl if the subtarget has +vl-dependent-latency
- if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
- VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
- !ST->hasVLDependentLatency()) {
- Res.VLAny = false;
- Res.VLZeroness = true;
- Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
- Res.TailPolicy = false;
- }
-
- // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
- // same semantically as vmv.s.x. This is particularly useful since we don't
- // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
- // it's place. Since a splat is non-constant time in LMUL, we do need to be
- // careful to not increase the number of active vector registers (unlike for
- // vmv.s.x.)
- if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
- VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
- !ST->hasVLDependentLatency()) {
- Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
- Res.SEWLMULRatio = false;
- Res.VLAny = false;
- if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
- !ST->hasVInstructionsF64())
- Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
- else
- Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
- Res.TailPolicy = false;
- }
- }
-
- // In §32.16.6, whole vector register moves have a dependency on SEW. At the
- // MIR level though we don't encode the element type, and it gives the same
- // result whatever the SEW may be.
- //
- // However it does need valid SEW, i.e. vill must be cleared. The entry to a
- // function, calls and inline assembly may all set it, so make sure we clear
- // it for whole register copies. Do this by leaving VILL demanded.
- if (isVectorCopy(ST->getRegisterInfo(), MI)) {
- Res.LMUL = DemandedFields::LMULNone;
- Res.SEW = DemandedFields::SEWNone;
- Res.SEWLMULRatio = false;
- Res.TailPolicy = false;
- Res.MaskPolicy = false;
- }
-
- if (RISCVInstrInfo::isVExtractInstr(MI)) {
- assert(!RISCVII::hasVLOp(TSFlags));
- // TODO: LMUL can be any larger value (without cost)
- Res.TailPolicy = false;
- }
-
- return Res;
-}
-
-/// Defines the abstract state with which the forward dataflow models the
-/// values of the VL and VTYPE registers after insertion.
-class VSETVLIInfo {
- struct AVLDef {
- // Every AVLDef should have a VNInfo, unless we're running without
- // LiveIntervals in which case this will be nullptr.
- const VNInfo *ValNo;
- Register DefReg;
- };
- union {
- AVLDef AVLRegDef;
- unsigned AVLImm;
- };
-
- enum : uint8_t {
- Uninitialized,
- AVLIsReg,
- AVLIsImm,
- AVLIsVLMAX,
- Unknown, // AVL and VTYPE are fully unknown
- } State = Uninitialized;
-
- // Fields from VTYPE.
- RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1;
- uint8_t SEW = 0;
- uint8_t TailAgnostic : 1;
- uint8_t MaskAgnostic : 1;
- uint8_t SEWLMULRatioOnly : 1;
-
-public:
- VSETVLIInfo()
- : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
- SEWLMULRatioOnly(false) {}
-
- static VSETVLIInfo getUnknown() {
- VSETVLIInfo Info;
- Info.setUnknown();
- return Info;
- }
-
- bool isValid() const { return State != Uninitialized; }
- void setUnknown() { State = Unknown; }
- bool isUnknown() const { return State == Unknown; }
-
- void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
- assert(AVLReg.isVirtual());
- AVLRegDef.ValNo = VNInfo;
- AVLRegDef.DefReg = AVLReg;
- State = AVLIsReg;
- }
-
- void setAVLImm(unsigned Imm) {
- AVLImm = Imm;
- State = AVLIsImm;
- }
-
- void setAVLVLMAX() { State = AVLIsVLMAX; }
-
- bool hasAVLImm() const { return State == AVLIsImm; }
- bool hasAVLReg() const { return State == AVLIsReg; }
- bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
- Register getAVLReg() const {
- assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
- return AVLRegDef.DefReg;
- }
- unsigned getAVLImm() const {
- assert(hasAVLImm());
- return AVLImm;
- }
- const VNInfo *getAVLVNInfo() const {
- assert(hasAVLReg());
- return AVLRegDef.ValNo;
- }
- // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
- // a PHI node. In that case getAVLVNInfo()->def will point to the block
- // boundary slot and this will return nullptr. If LiveIntervals isn't
- // available, nullptr is also returned.
- const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
- assert(hasAVLReg());
- if (!LIS || getAVLVNInfo()->isPHIDef())
- return nullptr;
- auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
- assert(MI);
- return MI;
- }
-
- void setAVL(const VSETVLIInfo &Info) {
- assert(Info.isValid());
- if (Info.isUnknown())
- setUnknown();
- else if (Info.hasAVLReg())
- setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
- else if (Info.hasAVLVLMAX())
- setAVLVLMAX();
- else {
- assert(Info.hasAVLImm());
- setAVLImm(Info.getAVLImm());
- }
- }
-
- unsigned getSEW() const { return SEW; }
- RISCVVType::VLMUL getVLMUL() const { return VLMul; }
- bool getTailAgnostic() const { return TailAgnostic; }
- bool getMaskAgnostic() const { return MaskAgnostic; }
-
- bool hasNonZeroAVL(const LiveIntervals *LIS) const {
- if (hasAVLImm())
- return getAVLImm() > 0;
- if (hasAVLReg()) {
- if (auto *DefMI = getAVLDefMI(LIS))
- return RISCVInstrInfo::isNonZeroLoadImmediate(*DefMI);
- }
- if (hasAVLVLMAX())
- return true;
- return false;
- }
-
- bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
- const LiveIntervals *LIS) const {
- if (hasSameAVL(Other))
- return true;
- return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
- }
-
- bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
- if (hasAVLReg() && Other.hasAVLReg()) {
- assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
- "we either have intervals or we don't");
- if (!getAVLVNInfo())
- return getAVLReg() == Other.getAVLReg();
- return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
- getAVLReg() == Other.getAVLReg();
- }
-
- if (hasAVLImm() && Other.hasAVLImm())
- return getAVLImm() == Other.getAVLImm();
-
- if (hasAVLVLMAX())
- return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
-
- return false;
- }
-
- // Return true if the two lattice values are guaranteed to have
- // the same AVL value at runtime.
- bool hasSameAVL(const VSETVLIInfo &Other) const {
- // Without LiveIntervals, we don't know which instruction defines a
- // register. Since a register may be redefined, this means all AVLIsReg
- // states must be treated as possibly distinct.
- if (hasAVLReg() && Other.hasAVLReg()) {
- assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
- "we either have intervals or we don't");
- if (!getAVLVNInfo())
- return false;
- }
- return hasSameAVLLatticeValue(Other);
- }
-
- void setVTYPE(unsigned VType) {
- assert(isValid() && !isUnknown() &&
- "Can't set VTYPE for uninitialized or unknown");
- VLMul = RISCVVType::getVLMUL(VType);
- SEW = RISCVVType::getSEW(VType);
- TailAgnostic = RISCVVType::isTailAgnostic(VType);
- MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
- }
- void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
- assert(isValid() && !isUnknown() &&
- "Can't set VTYPE for uninitialized or unknown");
- VLMul = L;
- SEW = S;
- TailAgnostic = TA;
- MaskAgnostic = MA;
- }
-
- void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
-
- unsigned encodeVTYPE() const {
- assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
- "Can't encode VTYPE for uninitialized or unknown");
- return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
- }
-
- bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
-
- bool hasSameVTYPE(const VSETVLIInfo &Other) const {
- assert(isValid() && Other.isValid() &&
- "Can't compare invalid VSETVLIInfos");
- assert(!isUnknown() && !Other.isUnknown() &&
- "Can't compare VTYPE in unknown state");
- assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
- "Can't compare when only LMUL/SEW ratio is valid.");
- return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
- std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
- Other.MaskAgnostic);
- }
-
- unsigned getSEWLMULRatio() const {
- assert(isValid() && !isUnknown() &&
- "Can't use VTYPE for uninitialized or unknown");
- return RISCVVType::getSEWLMULRatio(SEW, VLMul);
- }
-
- // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
- // Note that having the same VLMAX ensures that both share the same
- // function from AVL to VL; that is, they must produce the same VL value
- // for any given AVL value.
- bool hasSameVLMAX(const VSETVLIInfo &Other) const {
- assert(isValid() && Other.isValid() &&
- "Can't compare invalid VSETVLIInfos");
- assert(!isUnknown() && !Other.isUnknown() &&
- "Can't compare VTYPE in unknown state");
- return getSEWLMULRatio() == Other.getSEWLMULRatio();
- }
-
- bool hasCompatibleVTYPE(const DemandedFields &Used,
- const VSETVLIInfo &Require) const {
- return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
- }
-
- // Determine whether the vector instructions requirements represented by
- // Require are compatible with the previous vsetvli instruction represented
- // by this. MI is the instruction whose requirements we're considering.
- bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
- const LiveIntervals *LIS) const {
- assert(isValid() && Require.isValid() &&
- "Can't compare invalid VSETVLIInfos");
- // Nothing is compatible with Unknown.
- if (isUnknown() || Require.isUnknown())
- return false;
-
- // If only our VLMAX ratio is valid, then this isn't compatible.
- if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
- return false;
-
- if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
- return false;
-
- if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
- return false;
-
- return hasCompatibleVTYPE(Used, Require);
- }
-
- bool operator==(const VSETVLIInfo &Other) const {
- // Uninitialized is only equal to another Uninitialized.
- if (!isValid())
- return !Other.isValid();
- if (!Other.isValid())
- return !isValid();
-
- // Unknown is only equal to another Unknown.
- if (isUnknown())
- return Other.isUnknown();
- if (Other.isUnknown())
- return isUnknown();
-
- if (!hasSameAVLLatticeValue(Other))
- return false;
-
- // If the SEWLMULRatioOnly bits are different, then they aren't equal.
- if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
- return false;
-
- // If only the VLMAX is valid, check that it is the same.
- if (SEWLMULRatioOnly)
- return hasSameVLMAX(Other);
-
- // If the full VTYPE is valid, check that it is the same.
- return hasSameVTYPE(Other);
- }
-
- bool operator!=(const VSETVLIInfo &Other) const {
- return !(*this == Other);
- }
-
- // Calculate the VSETVLIInfo visible to a block assuming this and Other are
- // both predecessors.
- VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
- // If the new value isn't valid, ignore it.
- if (!Other.isValid())
- return *this;
-
- // If this value isn't valid, this must be the first predecessor, use it.
- if (!isValid())
- return Other;
-
- // If either is unknown, the result is unknown.
- if (isUnknown() || Other.isUnknown())
- return VSETVLIInfo::getUnknown();
-
- // If we have an exact, match return this.
- if (*this == Other)
- return *this;
-
- // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
- // return an SEW/LMUL ratio only value.
- if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
- VSETVLIInfo MergeInfo = *this;
- MergeInfo.SEWLMULRatioOnly = true;
- return MergeInfo;
- }
-
- // Otherwise the result is unknown.
- return VSETVLIInfo::getUnknown();
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Support for debugging, callable in GDB: V->dump()
- LLVM_DUMP_METHOD void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
-
- /// Implement operator<<.
- /// @{
- void print(raw_ostream &OS) const {
- OS << "{";
- if (!isValid())
- OS << "Uninitialized";
- if (isUnknown())
- OS << "unknown";
- if (hasAVLReg())
- OS << "AVLReg=" << llvm::printReg(getAVLReg());
- if (hasAVLImm())
- OS << "AVLImm=" << (unsigned)AVLImm;
- if (hasAVLVLMAX())
- OS << "AVLVLMAX";
- OS << ", ";
-
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
-
- OS << "VLMul=";
- if (Fractional)
- OS << "mf";
- else
- OS << "m";
- OS << LMul << ", "
- << "SEW=e" << (unsigned)SEW << ", "
- << "TailAgnostic=" << (bool)TailAgnostic << ", "
- << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
- << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
- }
-#endif
-};
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_ATTRIBUTE_USED
-inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
- V.print(OS);
- return OS;
-}
-#endif
-
-struct BlockData {
- // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
- // block. Calculated in Phase 2.
- VSETVLIInfo Exit;
-
- // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
- // blocks. Calculated in Phase 2, and used by Phase 3.
- VSETVLIInfo Pred;
-
- // Keeps track of whether the block is already in the queue.
- bool InQueue = false;
-
- BlockData() = default;
-};
-
class RISCVInsertVSETVLI : public MachineFunctionPass {
+ RISCVVConfigInfo *VConfig;
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
// Possibly null!
LiveIntervals *LIS;
- std::vector<BlockData> BlockInfo;
std::queue<const MachineBasicBlock *> WorkList;
public:
@@ -873,6 +65,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
AU.addPreserved<SlotIndexesWrapperPass>();
AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
AU.addPreserved<LiveStacksWrapperLegacy>();
+ AU.addRequired<RISCVVConfigWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -880,30 +73,17 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
private:
- bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
- const VSETVLIInfo &CurInfo) const;
bool needVSETVLIPHI(const VSETVLIInfo &Require,
const MachineBasicBlock &MBB) const;
void insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
- void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
- void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
- bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
- VSETVLIInfo &Info) const;
- void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
- bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
- const DemandedFields &Used) const;
void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
-
- VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
- VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
- void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
};
} // end anonymous namespace
@@ -911,8 +91,11 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
char RISCVInsertVSETVLI::ID = 0;
char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
-INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
- false, false)
+INITIALIZE_PASS_BEGIN(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(RISCVVConfigWrapperPass)
+INITIALIZE_PASS_END(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
+ false, false)
// If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
// replace the AVL operand with the AVL of the defining vsetvli. E.g.
@@ -922,132 +105,6 @@ INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
// ->
// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
// $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
-void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
- if (!Info.hasAVLReg())
- return;
- const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
- if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
- return;
- VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
- if (!DefInstrInfo.hasSameVLMAX(Info))
- return;
- Info.setAVL(DefInstrInfo);
-}
-
-// Return a VSETVLIInfo representing the changes made by this VSETVLI or
-// VSETIVLI instruction.
-VSETVLIInfo
-RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
- VSETVLIInfo NewInfo;
- if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- NewInfo.setAVLImm(MI.getOperand(1).getImm());
- } else {
- assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0);
- if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
- NewInfo.setAVLVLMAX();
- else if (MI.getOperand(1).isUndef())
- // Otherwise use an AVL of 1 to avoid depending on previous vl.
- NewInfo.setAVLImm(1);
- else {
- Register AVLReg = MI.getOperand(1).getReg();
- VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
- NewInfo.setAVLRegDef(VNI, AVLReg);
- }
- }
- NewInfo.setVTYPE(MI.getOperand(2).getImm());
-
- forwardVSETVLIAVL(NewInfo);
-
- return NewInfo;
-}
-
-static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
- RISCVVType::VLMUL VLMul) {
- auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
- if (Fractional)
- VLEN = VLEN / LMul;
- else
- VLEN = VLEN * LMul;
- return VLEN/SEW;
-}
-
-VSETVLIInfo
-RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
- VSETVLIInfo InstrInfo;
- const uint64_t TSFlags = MI.getDesc().TSFlags;
-
- bool TailAgnostic = true;
- bool MaskAgnostic = true;
- if (!hasUndefinedPassthru(MI)) {
- // Start with undisturbed.
- TailAgnostic = false;
- MaskAgnostic = false;
-
- // If there is a policy operand, use it.
- if (RISCVII::hasVecPolicyOp(TSFlags)) {
- const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
- uint64_t Policy = Op.getImm();
- assert(Policy <=
- (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) &&
- "Invalid Policy Value");
- TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC;
- MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC;
- }
-
- if (!RISCVII::usesMaskPolicy(TSFlags))
- MaskAgnostic = true;
- }
-
- RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
-
- unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
- // A Log2SEW of 0 is an operation on mask registers only.
- unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
- assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
-
- if (RISCVII::hasVLOp(TSFlags)) {
- const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
- if (VLOp.isImm()) {
- int64_t Imm = VLOp.getImm();
- // Convert the VLMax sentintel to X0 register.
- if (Imm == RISCV::VLMaxSentinel) {
- // If we know the exact VLEN, see if we can use the constant encoding
- // for the VLMAX instead. This reduces register pressure slightly.
- const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
- if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
- InstrInfo.setAVLImm(VLMAX);
- else
- InstrInfo.setAVLVLMAX();
- }
- else
- InstrInfo.setAVLImm(Imm);
- } else if (VLOp.isUndef()) {
- // Otherwise use an AVL of 1 to avoid depending on previous vl.
- InstrInfo.setAVLImm(1);
- } else {
- VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
- InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
- }
- } else {
- assert(RISCVInstrInfo::isScalarExtractInstr(MI) ||
- RISCVInstrInfo::isVExtractInstr(MI));
- // Pick a random value for state tracking purposes, will be ignored via
- // the demanded fields mechanism
- InstrInfo.setAVLImm(1);
- }
-#ifndef NDEBUG
- if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
- assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
- }
-#endif
- InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
-
- forwardVSETVLIAVL(InstrInfo);
-
- return InstrInfo;
-}
-
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
@@ -1073,7 +130,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
DefMI && RISCVInstrInfo::isVectorConfigInstr(*DefMI)) {
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
+ VSETVLIInfo DefInfo = VConfig->getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
auto MI =
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
@@ -1150,208 +207,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
}
}
-/// Return true if a VSETVLI is required to transition from CurInfo to Require
-/// given a set of DemandedFields \p Used.
-bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
- const VSETVLIInfo &Require,
- const VSETVLIInfo &CurInfo) const {
- if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
- return true;
-
- if (CurInfo.isCompatible(Used, Require, LIS))
- return false;
-
- return true;
-}
-
-// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
-// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
-// places.
-static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
- const VSETVLIInfo &NewInfo,
- DemandedFields &Demanded) {
- VSETVLIInfo Info = NewInfo;
-
- if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
- !PrevInfo.isUnknown()) {
- if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
- PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
- Info.setVLMul(*NewVLMul);
- Demanded.LMUL = DemandedFields::LMULEqual;
- }
-
- return Info;
-}
-
-// Given an incoming state reaching MI, minimally modifies that state so that it
-// is compatible with MI. The resulting state is guaranteed to be semantically
-// legal for MI, but may not be the state requested by MI.
-void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
- const MachineInstr &MI) const {
- if (isVectorCopy(ST->getRegisterInfo(), MI) &&
- (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
- // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
- // be coalesced into another vsetvli since we won't demand any fields.
- VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
- NewInfo.setAVLImm(1);
- NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
- Info = NewInfo;
- return;
- }
-
- if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
- return;
-
- DemandedFields Demanded = getDemanded(MI, ST);
-
- const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
- assert(NewInfo.isValid() && !NewInfo.isUnknown());
- if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
- return;
-
- const VSETVLIInfo PrevInfo = Info;
- if (!Info.isValid() || Info.isUnknown())
- Info = NewInfo;
-
- const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
-
- // If MI only demands that VL has the same zeroness, we only need to set the
- // AVL if the zeroness differs. This removes a vsetvli entirely if the types
- // match or allows use of cheaper avl preserving variant if VLMAX doesn't
- // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
- // variant, so we avoid the transform to prevent extending live range of an
- // avl register operand.
- // TODO: We can probably relax this for immediates.
- bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
- IncomingInfo.hasSameVLMAX(PrevInfo);
- if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
- Info.setAVL(IncomingInfo);
-
- Info.setVTYPE(
- ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
- .getVLMUL(),
- ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
- // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
- // if needed.
- (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
- IncomingInfo.getTailAgnostic(),
- (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
- IncomingInfo.getMaskAgnostic());
-
- // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
- // the AVL.
- if (Info.hasSEWLMULRatioOnly()) {
- VSETVLIInfo RatiolessInfo = IncomingInfo;
- RatiolessInfo.setAVL(Info);
- Info = RatiolessInfo;
- }
-}
-
-// Given a state with which we evaluated MI (see transferBefore above for why
-// this might be different that the state MI requested), modify the state to
-// reflect the changes MI might make.
-void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
- const MachineInstr &MI) const {
- if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
- Info = getInfoForVSETVLI(MI);
- return;
- }
-
- if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
- // Update AVL to vl-output of the fault first load.
- assert(MI.getOperand(1).getReg().isVirtual());
- if (LIS) {
- auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
- SlotIndex SI =
- LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
- VNInfo *VNI = LI.getVNInfoAt(SI);
- Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
- } else
- Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
- return;
- }
-
- // If this is something that updates VL/VTYPE that we don't know about, set
- // the state to unknown.
- if (MI.isCall() || MI.isInlineAsm() ||
- MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
- MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
- Info = VSETVLIInfo::getUnknown();
-}
-
-bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
- VSETVLIInfo &Info) const {
- bool HadVectorOp = false;
-
- Info = BlockInfo[MBB.getNumber()].Pred;
- for (const MachineInstr &MI : MBB) {
- transferBefore(Info, MI);
-
- if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
- RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
- isVectorCopy(ST->getRegisterInfo(), MI))
- HadVectorOp = true;
-
- transferAfter(Info, MI);
- }
-
- return HadVectorOp;
-}
-
-void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
-
- BlockData &BBInfo = BlockInfo[MBB.getNumber()];
-
- BBInfo.InQueue = false;
-
- // Start with the previous entry so that we keep the most conservative state
- // we have ever found.
- VSETVLIInfo InInfo = BBInfo.Pred;
- if (MBB.pred_empty()) {
- // There are no predecessors, so use the default starting status.
- InInfo.setUnknown();
- } else {
- for (MachineBasicBlock *P : MBB.predecessors())
- InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
- }
-
- // If we don't have any valid predecessor value, wait until we do.
- if (!InInfo.isValid())
- return;
-
- // If no change, no need to rerun block
- if (InInfo == BBInfo.Pred)
- return;
-
- BBInfo.Pred = InInfo;
- LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
- << " changed to " << BBInfo.Pred << "\n");
-
- // Note: It's tempting to cache the state changes here, but due to the
- // compatibility checks performed a blocks output state can change based on
- // the input state. To cache, we'd have to add logic for finding
- // never-compatible state changes.
- VSETVLIInfo TmpStatus;
- computeVLVTYPEChanges(MBB, TmpStatus);
-
- // If the new exit value matches the old exit value, we don't need to revisit
- // any blocks.
- if (BBInfo.Exit == TmpStatus)
- return;
-
- BBInfo.Exit = TmpStatus;
- LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
- << " changed to " << BBInfo.Exit << "\n");
-
- // Add the successors to the work list so we can propagate the changed exit
- // status.
- for (MachineBasicBlock *S : MBB.successors())
- if (!BlockInfo[S->getNumber()].InQueue) {
- BlockInfo[S->getNumber()].InQueue = true;
- WorkList.push(S);
- }
-}
-
// If we weren't able to prove a vsetvli was directly unneeded, it might still
// be unneeded if the AVL was a phi node where all incoming values are VL
// outputs from the last VSETVLI in their respective basic blocks.
@@ -1371,7 +226,7 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
for (auto *PBB : MBB.predecessors()) {
- const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
+ const VSETVLIInfo &PBBExit = VConfig->getInfo()[PBB->getNumber()].Exit;
// We need the PHI input to the be the output of a VSET(I)VLI.
const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
@@ -1383,7 +238,7 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
// We found a VSET(I)VLI make sure it matches the output of the
// predecessor block.
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
+ VSETVLIInfo DefInfo = VConfig->getInfoForVSETVLI(*DefMI);
if (DefInfo != PBBExit)
return true;
@@ -1400,13 +255,13 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
}
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
- VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
+ VSETVLIInfo CurInfo = VConfig->getInfo()[MBB.getNumber()].Pred;
// Track whether the prefix of the block we've scanned is transparent
// (meaning has not yet changed the abstract state).
bool PrefixTransparent = true;
for (MachineInstr &MI : MBB) {
const VSETVLIInfo PrevInfo = CurInfo;
- transferBefore(CurInfo, MI);
+ VConfig->transferBefore(CurInfo, MI);
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
@@ -1420,7 +275,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
}
if (EnsureWholeVectorRegisterMoveValidVTYPE &&
- isVectorCopy(ST->getRegisterInfo(), MI)) {
+ RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI)) {
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
PrefixTransparent = false;
@@ -1492,10 +347,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
PrefixTransparent = false;
- transferAfter(CurInfo, MI);
+ VConfig->transferAfter(CurInfo, MI);
}
- const auto &Info = BlockInfo[MBB.getNumber()];
+ const auto &Info = VConfig->getInfo()[MBB.getNumber()];
if (CurInfo != Info.Exit) {
LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
@@ -1511,13 +366,13 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
/// this is geared to catch the common case of a fixed length vsetvl in a single
/// block loop when it could execute once in the preheader instead.
void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
- if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
+ if (!VConfig->getInfo()[MBB.getNumber()].Pred.isUnknown())
return;
MachineBasicBlock *UnavailablePred = nullptr;
VSETVLIInfo AvailableInfo;
for (MachineBasicBlock *P : MBB.predecessors()) {
- const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
+ const VSETVLIInfo &PredInfo = VConfig->getInfo()[P->getNumber()].Exit;
if (PredInfo.isUnknown()) {
if (UnavailablePred)
return;
@@ -1568,20 +423,20 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
// not make this change without reflowing later blocks as well.
// 2) If we don't actually remove a transition, inserting a vsetvli
// into the predecessor block would be correct, but unprofitable.
- VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
+ VSETVLIInfo OldInfo = VConfig->getInfo()[MBB.getNumber()].Pred;
VSETVLIInfo CurInfo = AvailableInfo;
int TransitionsRemoved = 0;
for (const MachineInstr &MI : MBB) {
const VSETVLIInfo LastInfo = CurInfo;
const VSETVLIInfo LastOldInfo = OldInfo;
- transferBefore(CurInfo, MI);
- transferBefore(OldInfo, MI);
+ VConfig->transferBefore(CurInfo, MI);
+ VConfig->transferBefore(OldInfo, MI);
if (CurInfo == LastInfo)
TransitionsRemoved++;
if (LastOldInfo == OldInfo)
TransitionsRemoved--;
- transferAfter(CurInfo, MI);
- transferAfter(OldInfo, MI);
+ VConfig->transferAfter(CurInfo, MI);
+ VConfig->transferAfter(OldInfo, MI);
if (CurInfo == OldInfo)
// Convergence. All transitions after this must match by construction.
break;
@@ -1593,12 +448,12 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
// Finally, update both data flow state and insert the actual vsetvli.
// Doing both keeps the code in sync with the dataflow results, which
// is critical for correctness of phase 3.
- auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
+ auto OldExit = VConfig->getInfo()[UnavailablePred->getNumber()].Exit;
LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
<< UnavailablePred->getName() << " with state "
<< AvailableInfo << "\n");
- BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
- BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
+ VConfig->getInfo()[UnavailablePred->getNumber()].Exit = AvailableInfo;
+ VConfig->getInfo()[MBB.getNumber()].Pred = AvailableInfo;
// Note there's an implicit assumption here that terminators never use
// or modify VL or VTYPE. Also, fallthrough will return end().
@@ -1608,44 +463,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
AvailableInfo, OldExit);
}
-// Return true if we can mutate PrevMI to match MI without changing any the
-// fields which would be observed.
-bool RISCVInsertVSETVLI::canMutatePriorConfig(
- const MachineInstr &PrevMI, const MachineInstr &MI,
- const DemandedFields &Used) const {
- // If the VL values aren't equal, return false if either a) the former is
- // demanded, or b) we can't rewrite the former to be the later for
- // implementation reasons.
- if (!RISCVInstrInfo::isVLPreservingConfig(MI)) {
- if (Used.VLAny)
- return false;
-
- if (Used.VLZeroness) {
- if (RISCVInstrInfo::isVLPreservingConfig(PrevMI))
- return false;
- if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
- LIS))
- return false;
- }
-
- auto &AVL = MI.getOperand(1);
-
- // If the AVL is a register, we need to make sure its definition is the same
- // at PrevMI as it was at MI.
- if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
- VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
- VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
- if (!VNI || !PrevVNI || VNI != PrevVNI)
- return false;
- }
- }
-
- assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
- auto PriorVType = PrevMI.getOperand(2).getImm();
- auto VType = MI.getOperand(2).getImm();
- return areCompatibleVTYPEs(PriorVType, VType, Used);
-}
-
void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
@@ -1673,7 +490,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
- Used.doUnion(getDemanded(MI, ST));
+ Used.doUnion(RISCVVConfigInfo::getDemanded(MI, ST));
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
@@ -1695,7 +512,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
continue;
}
- if (canMutatePriorConfig(MI, *NextMI, Used)) {
+ if (VConfig->canMutatePriorConfig(MI, *NextMI, Used)) {
if (!RISCVInstrInfo::isVLPreservingConfig(*NextMI)) {
Register DefReg = NextMI->getOperand(0).getReg();
@@ -1743,7 +560,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
}
}
NextMI = &MI;
- Used = getDemanded(MI, ST);
+ Used = RISCVVConfigInfo::getDemanded(MI, ST);
}
// Loop over the dead AVL values, and delete them now. This has
@@ -1796,51 +613,14 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+ VConfig = &getAnalysis<RISCVVConfigWrapperPass>().getResult();
- assert(BlockInfo.empty() && "Expect empty block infos");
- BlockInfo.resize(MF.getNumBlockIDs());
-
- bool HaveVectorOp = false;
-
- // Phase 1 - determine how VL/VTYPE are affected by the each block.
- for (const MachineBasicBlock &MBB : MF) {
- VSETVLIInfo TmpStatus;
- HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
- // Initial exit state is whatever change we found in the block.
- BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.Exit = TmpStatus;
- LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
- << " is " << BBInfo.Exit << "\n");
-
- }
-
- // If we didn't find any instructions that need VSETVLI, we're done.
- if (!HaveVectorOp) {
- BlockInfo.clear();
+ if (!VConfig->haveVectorOp())
return false;
- }
-
- // Phase 2 - determine the exit VL/VTYPE from each block. We add all
- // blocks to the list here, but will also add any that need to be revisited
- // during Phase 2 processing.
- for (const MachineBasicBlock &MBB : MF) {
- WorkList.push(&MBB);
- BlockInfo[MBB.getNumber()].InQueue = true;
- }
- while (!WorkList.empty()) {
- const MachineBasicBlock &MBB = *WorkList.front();
- WorkList.pop();
- computeIncomingVLVTYPE(MBB);
- }
-
// Perform partial redundancy elimination of vsetvli transitions.
for (MachineBasicBlock &MBB : MF)
doPRE(MBB);
- // Phase 3 - add any vsetvli instructions needed in the block. Use the
- // Phase 2 information to avoid adding vsetvlis before the first vector
- // instruction in the block if the VL/VTYPE is satisfied by its
- // predecessors.
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);
@@ -1861,8 +641,7 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
insertReadVL(MBB);
- BlockInfo.clear();
- return HaveVectorOp;
+ return true;
}
/// Returns an instance of the Insert VSETVLI pass.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 64f9e3eb8d86f..4a9acdbb68183 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1464,6 +1464,64 @@ bool RISCVInstrInfo::isFromLoadImm(const MachineRegisterInfo &MRI,
return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
}
+bool RISCVInstrInfo::isVectorCopy(const TargetRegisterInfo *TRI,
+ const MachineInstr &MI) {
+ return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
+ RISCVRegisterInfo::isRVVRegClass(
+ TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
+}
+
+bool RISCVInstrInfo::isMaskRegOp(const MachineInstr &MI) {
+ if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
+ return false;
+ const unsigned Log2SEW =
+ MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+ // A Log2SEW of 0 is an operation on mask registers only.
+ return Log2SEW == 0;
+}
+
+bool RISCVInstrInfo::hasUndefinedPassthru(const MachineInstr &MI) {
+
+ unsigned UseOpIdx;
+ if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
+ // If there is no passthrough operand, then the pass through
+ // lanes are undefined.
+ return true;
+
+ // All undefined passthrus should be $noreg: see
+ // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
+ const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
+}
+
+std::optional<unsigned>
+RISCVInstrInfo::getEEWForLoadStore(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return std::nullopt;
+ case RISCV::VLE8_V:
+ case RISCV::VLSE8_V:
+ case RISCV::VSE8_V:
+ case RISCV::VSSE8_V:
+ return 8;
+ case RISCV::VLE16_V:
+ case RISCV::VLSE16_V:
+ case RISCV::VSE16_V:
+ case RISCV::VSSE16_V:
+ return 16;
+ case RISCV::VLE32_V:
+ case RISCV::VLSE32_V:
+ case RISCV::VSE32_V:
+ case RISCV::VSSE32_V:
+ return 32;
+ case RISCV::VLE64_V:
+ case RISCV::VLSE64_V:
+ case RISCV::VSE64_V:
+ case RISCV::VSSE64_V:
+ return 64;
+ }
+}
+
bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
bool IsSigned = false;
bool IsEquality = false;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 785c8352d4a5e..35fa510a3d7a3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -321,6 +321,25 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
static bool isFromLoadImm(const MachineRegisterInfo &MRI,
const MachineOperand &Op, int64_t &Imm);
+ /// Return true if \p MI is a copy that will be lowered to one or more
+ /// vmvNr.vs.
+ static bool isVectorCopy(const TargetRegisterInfo *TRI,
+ const MachineInstr &MI);
+
+ /// Return true if this is an operation on mask registers. Note that
+ /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
+ static bool isMaskRegOp(const MachineInstr &MI);
+
+ /// Return true if the inactive elements in the result are entirely undefined.
+ /// Note that this is different from "agnostic" as defined by the vector
+ /// specification. Agnostic requires each lane to either be undisturbed, or
+ /// take the value -1; no other value is allowed.
+ static bool hasUndefinedPassthru(const MachineInstr &MI);
+
+ /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
+ /// not a load or store which ignores SEW.
+ static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI);
+
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/lib/Target/RISCV/RISCVVConfigAnalysis.cpp b/llvm/lib/Target/RISCV/RISCVVConfigAnalysis.cpp
new file mode 100644
index 0000000000000..3c1b6cf016622
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVVConfigAnalysis.cpp
@@ -0,0 +1,675 @@
+//===- RISCVVConfigAnalysis --------------------------------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the RISCV analysis of vector unit config.
+//===----------------------------------------------------------------------===//
+
+#include "RISCVVConfigAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveStacks.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-vconfig-analysis"
+
+static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) {
+ auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
+ return Fractional || LMul == 1;
+}
+
+static unsigned getVLOpNum(const MachineInstr &MI) {
+ return RISCVII::getVLOpNum(MI.getDesc());
+}
+
+static unsigned getSEWOpNum(const MachineInstr &MI) {
+ return RISCVII::getSEWOpNum(MI.getDesc());
+}
+
+/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
+/// This will return nullptr if the virtual register is an implicit_def or
+/// if LiveIntervals is not available.
+static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
+ const LiveIntervals *LIS) {
+ assert(Reg.isVirtual());
+ if (!LIS)
+ return nullptr;
+ auto &LI = LIS->getInterval(Reg);
+ SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
+ return LI.getVNInfoBefore(SI);
+}
+
+bool RISCVVConfigInfo::areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
+ const DemandedFields &Used) {
+ switch (Used.SEW) {
+ case DemandedFields::SEWNone:
+ break;
+ case DemandedFields::SEWEqual:
+ if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
+ return false;
+ break;
+ case DemandedFields::SEWGreaterThanOrEqual:
+ if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
+ return false;
+ break;
+ case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
+ if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
+ RISCVVType::getSEW(NewVType) >= 64)
+ return false;
+ break;
+ }
+
+ switch (Used.LMUL) {
+ case DemandedFields::LMULNone:
+ break;
+ case DemandedFields::LMULEqual:
+ if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
+ return false;
+ break;
+ case DemandedFields::LMULLessThanOrEqualToM1:
+ if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
+ return false;
+ break;
+ }
+
+ if (Used.SEWLMULRatio) {
+ auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
+ RISCVVType::getVLMUL(CurVType));
+ auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
+ RISCVVType::getVLMUL(NewVType));
+ if (Ratio1 != Ratio2)
+ return false;
+ }
+
+ if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
+ RISCVVType::isTailAgnostic(NewVType))
+ return false;
+ if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
+ RISCVVType::isMaskAgnostic(NewVType))
+ return false;
+ return true;
+}
+
+bool VSETVLIInfo::hasCompatibleVTYPE(const DemandedFields &Used,
+ const VSETVLIInfo &Require) const {
+ return RISCVVConfigInfo::areCompatibleVTYPEs(Require.encodeVTYPE(),
+ encodeVTYPE(), Used);
+}
+
+bool RISCVVConfigInfo::haveVectorOp() { return HaveVectorOp; }
+
+/// Return the fields and properties demanded by the provided instruction.
+DemandedFields RISCVVConfigInfo::getDemanded(const MachineInstr &MI,
+ const RISCVSubtarget *ST) {
+ // This function works in coalesceVSETVLI too. We can still use the value of a
+ // SEW, VL, or Policy operand even though it might not be the exact value in
+ // the VL or VTYPE, since we only care about what the instruction originally
+ // demanded.
+
+ // Most instructions don't use any of these subfeilds.
+ DemandedFields Res;
+ // Start conservative if registers are used
+ if (MI.isCall() || MI.isInlineAsm() ||
+ MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
+ Res.demandVL();
+ if (MI.isCall() || MI.isInlineAsm() ||
+ MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
+ Res.demandVTYPE();
+ // Start conservative on the unlowered form too
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (RISCVII::hasSEWOp(TSFlags)) {
+ Res.demandVTYPE();
+ if (RISCVII::hasVLOp(TSFlags))
+ if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
+ !VLOp.isReg() || !VLOp.isUndef())
+ Res.demandVL();
+
+ // Behavior is independent of mask policy.
+ if (!RISCVII::usesMaskPolicy(TSFlags))
+ Res.MaskPolicy = false;
+ }
+
+ // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
+ // They instead demand the ratio of the two which is used in computing
+ // EMUL, but which allows us the flexibility to change SEW and LMUL
+ // provided we don't change the ratio.
+ // Note: We assume that the instructions initial SEW is the EEW encoded
+ // in the opcode. This is asserted when constructing the VSETVLIInfo.
+ if (RISCVInstrInfo::getEEWForLoadStore(MI)) {
+ Res.SEW = DemandedFields::SEWNone;
+ Res.LMUL = DemandedFields::LMULNone;
+ }
+
+ // Store instructions don't use the policy fields.
+ if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
+ // If this is a mask reg operation, it only cares about VLMAX.
+ // TODO: Possible extensions to this logic
+ // * Probably ok if available VLMax is larger than demanded
+ // * The policy bits can probably be ignored..
+ if (RISCVInstrInfo::isMaskRegOp(MI)) {
+ Res.SEW = DemandedFields::SEWNone;
+ Res.LMUL = DemandedFields::LMULNone;
+ }
+
+ // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
+ if (RISCVInstrInfo::isScalarInsertInstr(MI)) {
+ Res.LMUL = DemandedFields::LMULNone;
+ Res.SEWLMULRatio = false;
+ Res.VLAny = false;
+ // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
+ // need to preserve any other bits and are thus compatible with any larger,
+ // etype and can disregard policy bits. Warning: It's tempting to try doing
+ // this for any tail agnostic operation, but we can't as TA requires
+ // tail lanes to either be the original value or -1. We are writing
+ // unknown bits to the lanes here.
+ if (RISCVInstrInfo::hasUndefinedPassthru(MI)) {
+ if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
+ !ST->hasVInstructionsF64())
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+ else
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ Res.TailPolicy = false;
+ }
+ }
+
+ // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
+ if (RISCVInstrInfo::isScalarExtractInstr(MI)) {
+ assert(!RISCVII::hasVLOp(TSFlags));
+ Res.LMUL = DemandedFields::LMULNone;
+ Res.SEWLMULRatio = false;
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
+ if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
+ const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
+ // A slidedown/slideup with an *undefined* passthru can freely clobber
+ // elements not copied from the source vector (e.g. masked off, tail, or
+ // slideup's prefix). Notes:
+ // * We can't modify SEW here since the slide amount is in units of SEW.
+ // * VL=1 is special only because we have existing support for zero vs
+ // non-zero VL. We could generalize this if we had a VL > C predicate.
+ // * The LMUL1 restriction is for machines whose latency may depend on LMUL.
+ // * As above, this is only legal for tail "undefined" not "agnostic".
+ // * We avoid increasing vl if the subtarget has +vl-dependent-latency
+ if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
+ VLOp.getImm() == 1 && RISCVInstrInfo::hasUndefinedPassthru(MI) &&
+ !ST->hasVLDependentLatency()) {
+ Res.VLAny = false;
+ Res.VLZeroness = true;
+ Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
+ Res.TailPolicy = false;
+ }
+
+ // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
+ // same semantically as vmv.s.x. This is particularly useful since we don't
+ // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
+ // it's place. Since a splat is non-constant time in LMUL, we do need to be
+ // careful to not increase the number of active vector registers (unlike for
+ // vmv.s.x.)
+ if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
+ VLOp.getImm() == 1 && RISCVInstrInfo::hasUndefinedPassthru(MI) &&
+ !ST->hasVLDependentLatency()) {
+ Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
+ Res.SEWLMULRatio = false;
+ Res.VLAny = false;
+ if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
+ !ST->hasVInstructionsF64())
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+ else
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ Res.TailPolicy = false;
+ }
+ }
+
+ // In §32.16.6, whole vector register moves have a dependency on SEW. At the
+ // MIR level though we don't encode the element type, and it gives the same
+ // result whatever the SEW may be.
+ //
+ // However it does need valid SEW, i.e. vill must be cleared. The entry to a
+ // function, calls and inline assembly may all set it, so make sure we clear
+ // it for whole register copies. Do this by leaving VILL demanded.
+ if (RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI)) {
+ Res.LMUL = DemandedFields::LMULNone;
+ Res.SEW = DemandedFields::SEWNone;
+ Res.SEWLMULRatio = false;
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
+ if (RISCVInstrInfo::isVExtractInstr(MI)) {
+ assert(!RISCVII::hasVLOp(TSFlags));
+ // TODO: LMUL can be any larger value (without cost)
+ Res.TailPolicy = false;
+ }
+
+ return Res;
+}
+
+// Given an incoming state reaching MI, minimally modifies that state so that it
+// is compatible with MI. The resulting state is guaranteed to be semantically
+// legal for MI, but may not be the state requested by MI.
+void RISCVVConfigInfo::transferBefore(VSETVLIInfo &Info,
+ const MachineInstr &MI) const {
+ if (RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI) &&
+ (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
+ // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
+ // be coalesced into another vsetvli since we won't demand any fields.
+ VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
+ NewInfo.setAVLImm(1);
+ NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
+ Info = NewInfo;
+ return;
+ }
+
+ if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
+ return;
+
+ DemandedFields Demanded = getDemanded(MI, ST);
+
+ const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
+ assert(NewInfo.isValid() && !NewInfo.isUnknown());
+ if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
+ return;
+
+ const VSETVLIInfo PrevInfo = Info;
+ if (!Info.isValid() || Info.isUnknown())
+ Info = NewInfo;
+
+ const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
+
+ // If MI only demands that VL has the same zeroness, we only need to set the
+ // AVL if the zeroness differs. This removes a vsetvli entirely if the types
+ // match or allows use of cheaper avl preserving variant if VLMAX doesn't
+ // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
+ // variant, so we avoid the transform to prevent extending live range of an
+ // avl register operand.
+ // TODO: We can probably relax this for immediates.
+ bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
+ IncomingInfo.hasSameVLMAX(PrevInfo);
+ if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
+ Info.setAVL(IncomingInfo);
+
+ Info.setVTYPE(
+ ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
+ .getVLMUL(),
+ ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
+ // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
+ // if needed.
+ (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
+ IncomingInfo.getTailAgnostic(),
+ (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
+ IncomingInfo.getMaskAgnostic());
+
+ // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
+ // the AVL.
+ if (Info.hasSEWLMULRatioOnly()) {
+ VSETVLIInfo RatiolessInfo = IncomingInfo;
+ RatiolessInfo.setAVL(Info);
+ Info = RatiolessInfo;
+ }
+}
+
+// Given a state with which we evaluated MI (see transferBefore above for why
+// this might be different that the state MI requested), modify the state to
+// reflect the changes MI might make.
+void RISCVVConfigInfo::transferAfter(VSETVLIInfo &Info,
+ const MachineInstr &MI) const {
+ if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
+ Info = getInfoForVSETVLI(MI);
+ return;
+ }
+
+ if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
+ // Update AVL to vl-output of the fault first load.
+ assert(MI.getOperand(1).getReg().isVirtual());
+ if (LIS) {
+ auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
+ SlotIndex SI =
+ LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ VNInfo *VNI = LI.getVNInfoAt(SI);
+ Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
+ } else
+ Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
+ return;
+ }
+
+ // If this is something that updates VL/VTYPE that we don't know about, set
+ // the state to unknown.
+ if (MI.isCall() || MI.isInlineAsm() ||
+ MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
+ MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
+ Info = VSETVLIInfo::getUnknown();
+}
+
+unsigned RISCVVConfigInfo::computeVLMAX(unsigned VLEN, unsigned SEW,
+ RISCVVType::VLMUL VLMul) {
+ auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
+ if (Fractional)
+ VLEN = VLEN / LMul;
+ else
+ VLEN = VLEN * LMul;
+ return VLEN / SEW;
+}
+
+// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
+// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
+// places.
+VSETVLIInfo RISCVVConfigInfo::adjustIncoming(const VSETVLIInfo &PrevInfo,
+ const VSETVLIInfo &NewInfo,
+ DemandedFields &Demanded) {
+ VSETVLIInfo Info = NewInfo;
+
+ if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
+ !PrevInfo.isUnknown()) {
+ if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
+ PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
+ Info.setVLMul(*NewVLMul);
+ Demanded.LMUL = DemandedFields::LMULEqual;
+ }
+
+ return Info;
+}
+
+bool RISCVVConfigInfo::needVSETVLI(const DemandedFields &Used,
+ const VSETVLIInfo &Require,
+ const VSETVLIInfo &CurInfo) const {
+ if (!CurInfo.isValid() || CurInfo.isUnknown() ||
+ CurInfo.hasSEWLMULRatioOnly())
+ return true;
+
+ if (CurInfo.isCompatible(Used, Require, LIS))
+ return false;
+
+ return true;
+}
+
+VSETVLIInfo RISCVVConfigInfo::getInfoForVSETVLI(const MachineInstr &MI) const {
+ VSETVLIInfo NewInfo;
+ if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else {
+ assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0);
+ if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
+ NewInfo.setAVLVLMAX();
+ else if (MI.getOperand(1).isUndef())
+ // Otherwise use an AVL of 1 to avoid depending on previous vl.
+ NewInfo.setAVLImm(1);
+ else {
+ Register AVLReg = MI.getOperand(1).getReg();
+ VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
+ NewInfo.setAVLRegDef(VNI, AVLReg);
+ }
+ }
+ NewInfo.setVTYPE(MI.getOperand(2).getImm());
+
+ forwardVSETVLIAVL(NewInfo);
+
+ return NewInfo;
+}
+
+bool RISCVVConfigInfo::canMutatePriorConfig(const MachineInstr &PrevMI,
+ const MachineInstr &MI,
+ const DemandedFields &Used) const {
+ // If the VL values aren't equal, return false if either a) the former is
+ // demanded, or b) we can't rewrite the former to be the later for
+ // implementation reasons.
+ if (!RISCVInstrInfo::isVLPreservingConfig(MI)) {
+ if (Used.VLAny)
+ return false;
+
+ if (Used.VLZeroness) {
+ if (RISCVInstrInfo::isVLPreservingConfig(PrevMI))
+ return false;
+ if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
+ LIS))
+ return false;
+ }
+
+ auto &AVL = MI.getOperand(1);
+
+ // If the AVL is a register, we need to make sure its definition is the same
+ // at PrevMI as it was at MI.
+ if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
+ VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
+ VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
+ if (!VNI || !PrevVNI || VNI != PrevVNI)
+ return false;
+ }
+ }
+
+ assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
+ auto PriorVType = PrevMI.getOperand(2).getImm();
+ auto VType = MI.getOperand(2).getImm();
+ return areCompatibleVTYPEs(PriorVType, VType, Used);
+}
+
+VSETVLIInfo
+RISCVVConfigInfo::computeInfoForInstr(const MachineInstr &MI) const {
+ VSETVLIInfo InstrInfo;
+ const uint64_t TSFlags = MI.getDesc().TSFlags;
+
+ bool TailAgnostic = true;
+ bool MaskAgnostic = true;
+ if (!RISCVInstrInfo::hasUndefinedPassthru(MI)) {
+ // Start with undisturbed.
+ TailAgnostic = false;
+ MaskAgnostic = false;
+
+ // If there is a policy operand, use it.
+ if (RISCVII::hasVecPolicyOp(TSFlags)) {
+ const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
+ uint64_t Policy = Op.getImm();
+ assert(Policy <=
+ (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) &&
+ "Invalid Policy Value");
+ TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC;
+ MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC;
+ }
+
+ if (!RISCVII::usesMaskPolicy(TSFlags))
+ MaskAgnostic = true;
+ }
+
+ RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+
+ unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
+ // A Log2SEW of 0 is an operation on mask registers only.
+ unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
+ assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+
+ if (RISCVII::hasVLOp(TSFlags)) {
+ const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
+ if (VLOp.isImm()) {
+ int64_t Imm = VLOp.getImm();
+ // Convert the VLMax sentintel to X0 register.
+ if (Imm == RISCV::VLMaxSentinel) {
+ // If we know the exact VLEN, see if we can use the constant encoding
+ // for the VLMAX instead. This reduces register pressure slightly.
+ const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
+ if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
+ InstrInfo.setAVLImm(VLMAX);
+ else
+ InstrInfo.setAVLVLMAX();
+ } else
+ InstrInfo.setAVLImm(Imm);
+ } else if (VLOp.isUndef()) {
+ // Otherwise use an AVL of 1 to avoid depending on previous vl.
+ InstrInfo.setAVLImm(1);
+ } else {
+ VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
+ InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
+ }
+ } else {
+ assert(RISCVInstrInfo::isScalarExtractInstr(MI) ||
+ RISCVInstrInfo::isVExtractInstr(MI));
+ // Pick a random value for state tracking purposes, will be ignored via
+ // the demanded fields mechanism
+ InstrInfo.setAVLImm(1);
+ }
+#ifndef NDEBUG
+ if (std::optional<unsigned> EEW = RISCVInstrInfo::getEEWForLoadStore(MI)) {
+ assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
+ }
+#endif
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+
+ forwardVSETVLIAVL(InstrInfo);
+
+ return InstrInfo;
+}
+
+bool RISCVVConfigInfo::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
+ VSETVLIInfo &Info) const {
+ bool HadVectorOp = false;
+
+ Info = BlockInfo[MBB.getNumber()].Pred;
+ for (const MachineInstr &MI : MBB) {
+ transferBefore(Info, MI);
+
+ if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
+ RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI))
+ HadVectorOp = true;
+
+ transferAfter(Info, MI);
+ }
+
+ return HadVectorOp;
+}
+
+void RISCVVConfigInfo::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
+ if (!Info.hasAVLReg())
+ return;
+ const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
+ if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
+ return;
+ VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
+ if (!DefInstrInfo.hasSameVLMAX(Info))
+ return;
+ Info.setAVL(DefInstrInfo);
+}
+
+void RISCVVConfigInfo::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
+
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ BBInfo.InQueue = false;
+
+ // Start with the previous entry so that we keep the most conservative state
+ // we have ever found.
+ VSETVLIInfo InInfo = BBInfo.Pred;
+ if (MBB.pred_empty()) {
+ // There are no predecessors, so use the default starting status.
+ InInfo.setUnknown();
+ } else {
+ for (MachineBasicBlock *P : MBB.predecessors())
+ InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
+ }
+
+ // If we don't have any valid predecessor value, wait until we do.
+ if (!InInfo.isValid())
+ return;
+
+ // If no change, no need to rerun block
+ if (InInfo == BBInfo.Pred)
+ return;
+
+ BBInfo.Pred = InInfo;
+ LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.Pred << "\n");
+
+ // Note: It's tempting to cache the state changes here, but due to the
+ // compatibility checks performed a blocks output state can change based on
+ // the input state. To cache, we'd have to add logic for finding
+ // never-compatible state changes.
+ VSETVLIInfo TmpStatus;
+ computeVLVTYPEChanges(MBB, TmpStatus);
+
+ // If the new exit value matches the old exit value, we don't need to revisit
+ // any blocks.
+ if (BBInfo.Exit == TmpStatus)
+ return;
+
+ BBInfo.Exit = TmpStatus;
+ LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.Exit << "\n");
+
+ // Add the successors to the work list so we can propagate the changed exit
+ // status.
+ for (MachineBasicBlock *S : MBB.successors())
+ if (!BlockInfo[S->getNumber()].InQueue) {
+ BlockInfo[S->getNumber()].InQueue = true;
+ WorkList.push(S);
+ }
+}
+void RISCVVConfigInfo::compute(const MachineFunction &MF) {
+ assert(BlockInfo.empty() && "Expect empty block infos");
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ HaveVectorOp = false;
+
+ // Phase 1 - determine how VL/VTYPE are affected by the each block.
+ for (const MachineBasicBlock &MBB : MF) {
+ VSETVLIInfo TmpStatus;
+ HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
+ // Initial exit state is whatever change we found in the block.
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+ BBInfo.Exit = TmpStatus;
+ LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
+ << " is " << BBInfo.Exit << "\n");
+ }
+
+ // If we didn't find any instructions that need VSETVLI, we're done.
+ if (!HaveVectorOp) {
+ BlockInfo.clear();
+ return;
+ }
+
+ // Phase 2 - determine the exit VL/VTYPE from each block. We add all
+ // blocks to the list here, but will also add any that need to be revisited
+ // during Phase 2 processing.
+ for (const MachineBasicBlock &MBB : MF) {
+ WorkList.push(&MBB);
+ BlockInfo[MBB.getNumber()].InQueue = true;
+ }
+ while (!WorkList.empty()) {
+ const MachineBasicBlock &MBB = *WorkList.front();
+ WorkList.pop();
+ computeIncomingVLVTYPE(MBB);
+ }
+}
+
+void RISCVVConfigInfo::clear() { BlockInfo.clear(); }
+
+char RISCVVConfigWrapperPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RISCVVConfigWrapperPass, DEBUG_TYPE,
+ "RISCV Vector Config Analysis", false, true)
+INITIALIZE_PASS_END(RISCVVConfigWrapperPass, DEBUG_TYPE,
+ "RISCV Vector Config Analysis", false, true)
+
+RISCVVConfigWrapperPass::RISCVVConfigWrapperPass() : MachineFunctionPass(ID) {}
+
+void RISCVVConfigWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RISCVVConfigWrapperPass::runOnMachineFunction(MachineFunction &MF) {
+ auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+ LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+ Result = RISCVVConfigInfo(&MF.getSubtarget<RISCVSubtarget>(), LIS);
+ Result.compute(MF);
+ return false;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVVConfigAnalysis.h b/llvm/lib/Target/RISCV/RISCVVConfigAnalysis.h
new file mode 100644
index 0000000000000..c5f18602f7763
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVVConfigAnalysis.h
@@ -0,0 +1,620 @@
+//===- RISCVVConfigAnalysis --------------------------------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the RISCV analysis of vector unit config.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVVCONFIGANALYSIS_H
+#define LLVM_LIB_TARGET_RISCV_RISCVVCONFIGANALYSIS_H
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
+#include <queue>
+#include <vector>
+using namespace llvm;
+
+namespace llvm {
+/// Which subfields of VL or VTYPE have values we need to preserve?
+struct DemandedFields {
+ // Some unknown property of VL is used. If demanded, must preserve entire
+ // value.
+ bool VLAny = false;
+ // Only zero vs non-zero is used. If demanded, can change non-zero values.
+ bool VLZeroness = false;
+ // What properties of SEW we need to preserve.
+ enum : uint8_t {
+ SEWEqual = 3, // The exact value of SEW needs to be preserved.
+ SEWGreaterThanOrEqualAndLessThan64 =
+ 2, // SEW can be changed as long as it's greater
+ // than or equal to the original value, but must be less
+ // than 64.
+ SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
+ // than or equal to the original value.
+ SEWNone = 0 // We don't need to preserve SEW at all.
+ } SEW = SEWNone;
+ enum : uint8_t {
+ LMULEqual = 2, // The exact value of LMUL needs to be preserved.
+ LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
+ LMULNone = 0 // We don't need to preserve LMUL at all.
+ } LMUL = LMULNone;
+ bool SEWLMULRatio = false;
+ bool TailPolicy = false;
+ bool MaskPolicy = false;
+ // If this is true, we demand that VTYPE is set to some legal state, i.e. that
+ // vill is unset.
+ bool VILL = false;
+
+ // Return true if any part of VTYPE was used
+ bool usedVTYPE() const {
+ return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
+ }
+
+ // Return true if any property of VL was used
+ bool usedVL() { return VLAny || VLZeroness; }
+
+ // Mark all VTYPE subfields and properties as demanded
+ void demandVTYPE() {
+ SEW = SEWEqual;
+ LMUL = LMULEqual;
+ SEWLMULRatio = true;
+ TailPolicy = true;
+ MaskPolicy = true;
+ VILL = true;
+ }
+
+ // Mark all VL properties as demanded
+ void demandVL() {
+ VLAny = true;
+ VLZeroness = true;
+ }
+
+ static DemandedFields all() {
+ DemandedFields DF;
+ DF.demandVTYPE();
+ DF.demandVL();
+ return DF;
+ }
+
+ // Make this the result of demanding both the fields in this and B.
+ void doUnion(const DemandedFields &B) {
+ VLAny |= B.VLAny;
+ VLZeroness |= B.VLZeroness;
+ SEW = std::max(SEW, B.SEW);
+ LMUL = std::max(LMUL, B.LMUL);
+ SEWLMULRatio |= B.SEWLMULRatio;
+ TailPolicy |= B.TailPolicy;
+ MaskPolicy |= B.MaskPolicy;
+ VILL |= B.VILL;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Support for debugging, callable in GDB: V->dump()
+ LLVM_DUMP_METHOD void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+
+ /// Implement operator<<.
+ void print(raw_ostream &OS) const {
+ OS << "{";
+ OS << "VLAny=" << VLAny << ", ";
+ OS << "VLZeroness=" << VLZeroness << ", ";
+ OS << "SEW=";
+ switch (SEW) {
+ case SEWEqual:
+ OS << "SEWEqual";
+ break;
+ case SEWGreaterThanOrEqual:
+ OS << "SEWGreaterThanOrEqual";
+ break;
+ case SEWGreaterThanOrEqualAndLessThan64:
+ OS << "SEWGreaterThanOrEqualAndLessThan64";
+ break;
+ case SEWNone:
+ OS << "SEWNone";
+ break;
+ };
+ OS << ", ";
+ OS << "LMUL=";
+ switch (LMUL) {
+ case LMULEqual:
+ OS << "LMULEqual";
+ break;
+ case LMULLessThanOrEqualToM1:
+ OS << "LMULLessThanOrEqualToM1";
+ break;
+ case LMULNone:
+ OS << "LMULNone";
+ break;
+ };
+ OS << ", ";
+ OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
+ OS << "TailPolicy=" << TailPolicy << ", ";
+ OS << "MaskPolicy=" << MaskPolicy << ", ";
+ OS << "VILL=" << VILL;
+ OS << "}";
+ }
+#endif
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_ATTRIBUTE_USED
+inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
+ DF.print(OS);
+ return OS;
+}
+#endif
+
+/// Defines the abstract state with which the forward dataflow models the
+/// values of the VL and VTYPE registers after insertion.
+class VSETVLIInfo {
+ struct AVLDef {
+ // Every AVLDef should have a VNInfo, unless we're running without
+ // LiveIntervals in which case this will be nullptr.
+ const VNInfo *ValNo;
+ Register DefReg;
+ };
+ union {
+ AVLDef AVLRegDef;
+ unsigned AVLImm;
+ };
+
+ enum : uint8_t {
+ Uninitialized,
+ AVLIsReg,
+ AVLIsImm,
+ AVLIsVLMAX,
+ Unknown, // AVL and VTYPE are fully unknown
+ } State = Uninitialized;
+
+ // Fields from VTYPE.
+ RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1;
+ uint8_t SEW = 0;
+ uint8_t TailAgnostic : 1;
+ uint8_t MaskAgnostic : 1;
+ uint8_t SEWLMULRatioOnly : 1;
+
+public:
+ VSETVLIInfo()
+ : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
+ SEWLMULRatioOnly(false) {}
+
+ static VSETVLIInfo getUnknown() {
+ VSETVLIInfo Info;
+ Info.setUnknown();
+ return Info;
+ }
+
+ bool isValid() const { return State != Uninitialized; }
+ void setUnknown() { State = Unknown; }
+ bool isUnknown() const { return State == Unknown; }
+
+ void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
+ assert(AVLReg.isVirtual());
+ AVLRegDef.ValNo = VNInfo;
+ AVLRegDef.DefReg = AVLReg;
+ State = AVLIsReg;
+ }
+
+ void setAVLImm(unsigned Imm) {
+ AVLImm = Imm;
+ State = AVLIsImm;
+ }
+
+ void setAVLVLMAX() { State = AVLIsVLMAX; }
+
+ bool hasAVLImm() const { return State == AVLIsImm; }
+ bool hasAVLReg() const { return State == AVLIsReg; }
+ bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
+ Register getAVLReg() const {
+ assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
+ return AVLRegDef.DefReg;
+ }
+ unsigned getAVLImm() const {
+ assert(hasAVLImm());
+ return AVLImm;
+ }
+ const VNInfo *getAVLVNInfo() const {
+ assert(hasAVLReg());
+ return AVLRegDef.ValNo;
+ }
+ // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
+ // a PHI node. In that case getAVLVNInfo()->def will point to the block
+ // boundary slot and this will return nullptr. If LiveIntervals isn't
+ // available, nullptr is also returned.
+ const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
+ assert(hasAVLReg());
+ if (!LIS || getAVLVNInfo()->isPHIDef())
+ return nullptr;
+ auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
+ assert(MI);
+ return MI;
+ }
+
+ void setAVL(const VSETVLIInfo &Info) {
+ assert(Info.isValid());
+ if (Info.isUnknown())
+ setUnknown();
+ else if (Info.hasAVLReg())
+ setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
+ else if (Info.hasAVLVLMAX())
+ setAVLVLMAX();
+ else {
+ assert(Info.hasAVLImm());
+ setAVLImm(Info.getAVLImm());
+ }
+ }
+
+ unsigned getSEW() const { return SEW; }
+ RISCVVType::VLMUL getVLMUL() const { return VLMul; }
+ bool getTailAgnostic() const { return TailAgnostic; }
+ bool getMaskAgnostic() const { return MaskAgnostic; }
+
+ bool hasNonZeroAVL(const LiveIntervals *LIS) const {
+ if (hasAVLImm())
+ return getAVLImm() > 0;
+ if (hasAVLReg()) {
+ if (auto *DefMI = getAVLDefMI(LIS))
+ return RISCVInstrInfo::isNonZeroLoadImmediate(*DefMI);
+ }
+ if (hasAVLVLMAX())
+ return true;
+ return false;
+ }
+
+ bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
+ const LiveIntervals *LIS) const {
+ if (hasSameAVL(Other))
+ return true;
+ return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
+ }
+
+ bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
+ if (hasAVLReg() && Other.hasAVLReg()) {
+ assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
+ "we either have intervals or we don't");
+ if (!getAVLVNInfo())
+ return getAVLReg() == Other.getAVLReg();
+ return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
+ getAVLReg() == Other.getAVLReg();
+ }
+
+ if (hasAVLImm() && Other.hasAVLImm())
+ return getAVLImm() == Other.getAVLImm();
+
+ if (hasAVLVLMAX())
+ return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
+
+ return false;
+ }
+
+ // Return true if the two lattice values are guaranteed to have
+ // the same AVL value at runtime.
+ bool hasSameAVL(const VSETVLIInfo &Other) const {
+ // Without LiveIntervals, we don't know which instruction defines a
+ // register. Since a register may be redefined, this means all AVLIsReg
+ // states must be treated as possibly distinct.
+ if (hasAVLReg() && Other.hasAVLReg()) {
+ assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
+ "we either have intervals or we don't");
+ if (!getAVLVNInfo())
+ return false;
+ }
+ return hasSameAVLLatticeValue(Other);
+ }
+
+ void setVTYPE(unsigned VType) {
+ assert(isValid() && !isUnknown() &&
+ "Can't set VTYPE for uninitialized or unknown");
+ VLMul = RISCVVType::getVLMUL(VType);
+ SEW = RISCVVType::getSEW(VType);
+ TailAgnostic = RISCVVType::isTailAgnostic(VType);
+ MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
+ }
+ void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
+ assert(isValid() && !isUnknown() &&
+ "Can't set VTYPE for uninitialized or unknown");
+ VLMul = L;
+ SEW = S;
+ TailAgnostic = TA;
+ MaskAgnostic = MA;
+ }
+
+ void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
+
+ unsigned encodeVTYPE() const {
+ assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
+ "Can't encode VTYPE for uninitialized or unknown");
+ return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ }
+
+ bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
+
+ bool hasSameVTYPE(const VSETVLIInfo &Other) const {
+ assert(isValid() && Other.isValid() &&
+ "Can't compare invalid VSETVLIInfos");
+ assert(!isUnknown() && !Other.isUnknown() &&
+ "Can't compare VTYPE in unknown state");
+ assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
+ "Can't compare when only LMUL/SEW ratio is valid.");
+ return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
+ std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
+ Other.MaskAgnostic);
+ }
+
+ unsigned getSEWLMULRatio() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return RISCVVType::getSEWLMULRatio(SEW, VLMul);
+ }
+
+ // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
+ // Note that having the same VLMAX ensures that both share the same
+ // function from AVL to VL; that is, they must produce the same VL value
+ // for any given AVL value.
+ bool hasSameVLMAX(const VSETVLIInfo &Other) const {
+ assert(isValid() && Other.isValid() &&
+ "Can't compare invalid VSETVLIInfos");
+ assert(!isUnknown() && !Other.isUnknown() &&
+ "Can't compare VTYPE in unknown state");
+ return getSEWLMULRatio() == Other.getSEWLMULRatio();
+ }
+
+ bool hasCompatibleVTYPE(const DemandedFields &Used,
+ const VSETVLIInfo &Require) const;
+ // Determine whether the vector instructions requirements represented by
+ // Require are compatible with the previous vsetvli instruction represented
+ // by this. MI is the instruction whose requirements we're considering.
+ bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
+ const LiveIntervals *LIS) const {
+ assert(isValid() && Require.isValid() &&
+ "Can't compare invalid VSETVLIInfos");
+ // Nothing is compatible with Unknown.
+ if (isUnknown() || Require.isUnknown())
+ return false;
+
+ // If only our VLMAX ratio is valid, then this isn't compatible.
+ if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
+ return false;
+
+ if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
+ return false;
+
+ if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
+ return false;
+
+ return hasCompatibleVTYPE(Used, Require);
+ }
+
+ bool operator==(const VSETVLIInfo &Other) const {
+ // Uninitialized is only equal to another Uninitialized.
+ if (!isValid())
+ return !Other.isValid();
+ if (!Other.isValid())
+ return !isValid();
+
+ // Unknown is only equal to another Unknown.
+ if (isUnknown())
+ return Other.isUnknown();
+ if (Other.isUnknown())
+ return isUnknown();
+
+ if (!hasSameAVLLatticeValue(Other))
+ return false;
+
+ // If the SEWLMULRatioOnly bits are different, then they aren't equal.
+ if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
+ return false;
+
+ // If only the VLMAX is valid, check that it is the same.
+ if (SEWLMULRatioOnly)
+ return hasSameVLMAX(Other);
+
+ // If the full VTYPE is valid, check that it is the same.
+ return hasSameVTYPE(Other);
+ }
+
+ bool operator!=(const VSETVLIInfo &Other) const { return !(*this == Other); }
+
+ // Calculate the VSETVLIInfo visible to a block assuming this and Other are
+ // both predecessors.
+ VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
+ // If the new value isn't valid, ignore it.
+ if (!Other.isValid())
+ return *this;
+
+ // If this value isn't valid, this must be the first predecessor, use it.
+ if (!isValid())
+ return Other;
+
+ // If either is unknown, the result is unknown.
+ if (isUnknown() || Other.isUnknown())
+ return VSETVLIInfo::getUnknown();
+
+ // If we have an exact, match return this.
+ if (*this == Other)
+ return *this;
+
+ // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
+ // return an SEW/LMUL ratio only value.
+ if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
+ VSETVLIInfo MergeInfo = *this;
+ MergeInfo.SEWLMULRatioOnly = true;
+ return MergeInfo;
+ }
+
+ // Otherwise the result is unknown.
+ return VSETVLIInfo::getUnknown();
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Support for debugging, callable in GDB: V->dump()
+ LLVM_DUMP_METHOD void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+
+ /// Implement operator<<.
+ void print(raw_ostream &OS) const {
+ OS << "{";
+ if (!isValid())
+ OS << "Uninitialized";
+ if (isUnknown())
+ OS << "unknown";
+ if (hasAVLReg())
+ OS << "AVLReg=" << llvm::printReg(getAVLReg());
+ if (hasAVLImm())
+ OS << "AVLImm=" << (unsigned)AVLImm;
+ if (hasAVLVLMAX())
+ OS << "AVLVLMAX";
+ OS << ", ";
+
+ unsigned LMul;
+ bool Fractional;
+ std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
+
+ OS << "VLMul=";
+ if (Fractional)
+ OS << "mf";
+ else
+ OS << "m";
+ OS << LMul << ", "
+ << "SEW=e" << (unsigned)SEW << ", "
+ << "TailAgnostic=" << (bool)TailAgnostic << ", "
+ << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
+ << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
+ }
+#endif
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_ATTRIBUTE_USED
+inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
+ V.print(OS);
+ return OS;
+}
+#endif
+
+struct BlockData {
+ // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
+ // block. Calculated in Phase 2.
+ VSETVLIInfo Exit;
+
+ // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
+ // blocks. Calculated in Phase 2, and used by Phase 3.
+ VSETVLIInfo Pred;
+
+ // Keeps track of whether the block is already in the queue.
+ bool InQueue = false;
+
+ BlockData() = default;
+};
+
+class RISCVVConfigInfo {
+ bool HaveVectorOp = false;
+ const RISCVSubtarget *ST;
+ // Possibly null!
+ LiveIntervals *LIS;
+ std::queue<const MachineBasicBlock *> WorkList;
+ std::vector<BlockData> BlockInfo;
+
+public:
+ /// Return the fields and properties demanded by the provided instruction.
+ static DemandedFields getDemanded(const MachineInstr &MI,
+ const RISCVSubtarget *ST);
+
+ /// Return true if moving from CurVType to NewVType is
+ /// indistinguishable from the perspective of an instruction (or set
+ /// of instructions) which use only the Used subfields and properties.
+
+ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
+ const DemandedFields &Used);
+ // Return a VSETVLIInfo representing the changes made by this VSETVLI or
+ // VSETIVLI instruction.
+ VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
+
+ // Return true if we can mutate PrevMI to match MI without changing any the
+ // fields which would be observed.
+ bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
+ const DemandedFields &Used) const;
+ RISCVVConfigInfo() {}
+ RISCVVConfigInfo(const RISCVSubtarget *ST, LiveIntervals *LIS)
+ : ST(ST), LIS(LIS) {}
+ const std::vector<BlockData> &getInfo() const { return BlockInfo; }
+ std::vector<BlockData> &getInfo() { return BlockInfo; }
+ bool haveVectorOp();
+ void compute(const MachineFunction &MF);
+ void clear();
+ // Given an incoming state reaching MI, minimally modifies that state so that
+ // it is compatible with MI. The resulting state is guaranteed to be
+ // semantically legal for MI, but may not be the state requested by MI.
+ void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
+ // Given a state with which we evaluated MI (see transferBefore above for why
+ // this might be different that the state MI requested), modify the state to
+ // reflect the changes MI might make.
+ void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
+
+private:
+ static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
+ RISCVVType::VLMUL VLMul);
+ // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
+ // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
+ // places.
+ static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
+ const VSETVLIInfo &NewInfo,
+ DemandedFields &Demanded);
+ /// Return true if a VSETVLI is required to transition from CurInfo to Require
+ /// given a set of DemandedFields \p Used.
+ bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
+ const VSETVLIInfo &CurInfo) const;
+ void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
+ VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
+ bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
+ VSETVLIInfo &Info) const;
+ // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
+ // replace the AVL operand with the AVL of the defining vsetvli. E.g.
+ //
+ // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
+ // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
+ // ->
+ // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
+ // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
+ void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
+};
+
+class RISCVVConfigAnalysis : public AnalysisInfoMixin<RISCVVConfigAnalysis> {
+ friend AnalysisInfoMixin<RISCVVConfigAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = RISCVVConfigInfo;
+ Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+};
+
+class RISCVVConfigWrapperPass : public MachineFunctionPass {
+ RISCVVConfigInfo Result;
+
+public:
+ static char ID;
+
+ RISCVVConfigWrapperPass();
+
+ void getAnalysisUsage(AnalysisUsage &) const override;
+ bool runOnMachineFunction(MachineFunction &) override;
+ void releaseMemory() override { Result.clear(); }
+ RISCVVConfigInfo &getResult() { return Result; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_RISCV_RISCVVCONFIGANALYSIS_H
More information about the llvm-commits
mailing list