[llvm] [RISCV] Add XSfmm pseudo instruction and vset* insertion support (PR #143068)
Brandon Wu via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 20:56:35 PDT 2025
https://github.com/4vtomat updated https://github.com/llvm/llvm-project/pull/143068
>From 7802640092ba413559b4dc170454c59f1be2da41 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 5 Jun 2025 02:51:08 -0700
Subject: [PATCH 1/3] [RISCV] Add XSfmm pseudo instruction and vset* insertion
support
This patch supports the naive vset* insertion. If the state(tm, tn, tk,
sew, widen) changes, it emits all of the vset* instructions that are
needed, partial compatibility is not supported yet.
Co-authored-by: Piyou Chen <piyou.chen at sifive.com>
---
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 4 +
.../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 66 ++-
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 210 ++++++-
llvm/lib/Target/RISCV/RISCVInstrFormats.td | 19 +
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 +
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 22 +-
llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td | 9 +-
llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td | 138 +++++
llvm/lib/Target/RISCV/RISCVInstrPredicates.td | 33 +-
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 4 +
.../RISCV/rvv/sifive-xsfmm-vset-insert.mir | 523 ++++++++++++++++++
11 files changed, 1008 insertions(+), 23 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 1f434beca5388..8a18221832ecb 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1616,6 +1616,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"operand must be a valid system register "
"name or an integer in the range");
}
+ case Match_InvalidXSfmmVType: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return generateXSfmmVTypeError(ErrorLoc);
+ }
case Match_InvalidVTypeI: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return generateVTypeError(ErrorLoc);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 6ef94fb5e93da..e470d51c6c5fa 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -138,6 +138,25 @@ enum {
// 3 -> SEW * 4
DestEEWShift = ElementsDependOnMaskShift + 1,
DestEEWMask = 3ULL << DestEEWShift,
+
+ // 0 -> Don't care about altfmt bit in VTYPE.
+ // 1 -> Is not altfmt.
+ // 2 -> Is altfmt(BF16).
+ AltFmtTypeShift = DestEEWShift + 2,
+ AltFmtTypeMask = 3ULL << AltFmtTypeShift,
+
+ IsWidenShift = AltFmtTypeShift + 2,
+ IsWidenMask = 1ULL << IsWidenShift,
+
+ // XSfmmbase
+ HasTWidenOpShift = IsWidenShift + 1,
+ HasTWidenOpMask = 1ULL << HasTWidenOpShift,
+
+ HasTMOpShift = HasTWidenOpShift + 1,
+ HasTMOpMask = 1ULL << HasTMOpShift,
+
+ HasTKOpShift = HasTMOpShift + 1,
+ HasTKOpMask = 1ULL << HasTKOpShift,
};
// Helper functions to read TSFlags.
@@ -179,6 +198,11 @@ static inline bool hasRoundModeOp(uint64_t TSFlags) {
return TSFlags & HasRoundModeOpMask;
}
+enum class AltFmtType { DontCare, NotAltFmt, AltFmt };
+static inline AltFmtType getAltFmtType(uint64_t TSFlags) {
+ return static_cast<AltFmtType>((TSFlags & AltFmtTypeMask) >> AltFmtTypeShift);
+}
+
/// \returns true if this instruction uses vxrm
static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; }
@@ -194,11 +218,47 @@ static inline bool elementsDependOnMask(uint64_t TSFlags) {
return TSFlags & ElementsDependOnMaskMask;
}
+// XSfmmbase
+static inline bool hasTWidenOp(uint64_t TSFlags) {
+ return TSFlags & HasTWidenOpMask;
+}
+
+static inline bool hasTMOp(uint64_t TSFlags) { return TSFlags & HasTMOpMask; }
+
+static inline bool hasTKOp(uint64_t TSFlags) { return TSFlags & HasTKOpMask; }
+
+static inline unsigned getTNOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasVLOp(TSFlags));
+ unsigned Offset = 3;
+ if (hasTKOp(TSFlags))
+ Offset = 4;
+ return Desc.getNumOperands() - Offset;
+}
+
+static inline unsigned getTMOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasTMOp(TSFlags));
+ if (hasTKOp(TSFlags))
+ return Desc.getNumOperands() - 5;
+ // vtzero.t
+ return Desc.getNumOperands() - 4;
+}
+
+static inline unsigned getTKOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasTKOp(TSFlags));
+ return Desc.getNumOperands() - 3;
+}
+
static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
// This method is only called if we expect to have a VL operand, and all
// instructions with VL also have SEW.
assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
+ // In Xsfmmbase, TN is alias for VL, so here we use the same TSFlags bit.
+ if (hasTWidenOp(TSFlags))
+ return getTNOpNum(Desc);
unsigned Offset = 2;
if (hasVecPolicyOp(TSFlags))
Offset = 3;
@@ -216,7 +276,7 @@ static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
assert(hasSEWOp(TSFlags));
unsigned Offset = 1;
- if (hasVecPolicyOp(TSFlags))
+ if (hasVecPolicyOp(TSFlags) || hasTWidenOp(TSFlags))
Offset = 2;
return Desc.getNumOperands() - Offset;
}
@@ -233,6 +293,9 @@ static inline int getFRMOpNum(const MCInstrDesc &Desc) {
if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags))
return -1;
+ if (hasTWidenOp(TSFlags) && hasTMOp(TSFlags))
+ return getTMOpNum(Desc) - 1;
+
// The operand order
// --------------------------------------
// | n-1 (if any) | n-2 | n-3 | n-4 |
@@ -375,6 +438,7 @@ enum OperandType : unsigned {
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
OPERAND_AVL,
+ OPERAND_XSFMM_VTYPE,
};
} // namespace RISCVOp
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index e1c69c69a99ca..88d1178eba9b1 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -164,10 +164,13 @@ struct DemandedFields {
// If this is true, we demand that VTYPE is set to some legal state, i.e. that
// vill is unset.
bool VILL = false;
+ bool UseTWiden = false;
+ bool UseAltFmt = false;
// Return true if any part of VTYPE was used
bool usedVTYPE() const {
- return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
+ return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL ||
+ UseTWiden || UseAltFmt;
}
// Return true if any property of VL was used
@@ -183,6 +186,8 @@ struct DemandedFields {
TailPolicy = true;
MaskPolicy = true;
VILL = true;
+ UseTWiden = true;
+ UseAltFmt = true;
}
// Mark all VL properties as demanded
@@ -208,6 +213,8 @@ struct DemandedFields {
TailPolicy |= B.TailPolicy;
MaskPolicy |= B.MaskPolicy;
VILL |= B.VILL;
+ UseAltFmt |= B.UseAltFmt;
+ UseTWiden |= B.UseTWiden;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -255,6 +262,8 @@ struct DemandedFields {
OS << "TailPolicy=" << TailPolicy << ", ";
OS << "MaskPolicy=" << MaskPolicy << ", ";
OS << "VILL=" << VILL;
+ OS << "UseAltFmt=" << UseAltFmt << ", ";
+ OS << "UseTWiden=" << UseTWiden;
OS << "}";
}
#endif
@@ -324,6 +333,15 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
RISCVVType::isMaskAgnostic(NewVType))
return false;
+ if (Used.UseTWiden && (RISCVVType::hasXSfmmWiden(CurVType) !=
+ RISCVVType::hasXSfmmWiden(NewVType) ||
+ (RISCVVType::hasXSfmmWiden(CurVType) &&
+ RISCVVType::getXSfmmWiden(CurVType) !=
+ RISCVVType::getXSfmmWiden(NewVType))))
+ return false;
+ if (Used.UseAltFmt &&
+ RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType))
+ return false;
return true;
}
@@ -472,6 +490,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
Res.TailPolicy = false;
}
+ Res.UseAltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) !=
+ RISCVII::AltFmtType::DontCare;
+ Res.UseTWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI);
+
return Res;
}
@@ -503,6 +526,8 @@ class VSETVLIInfo {
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
uint8_t SEWLMULRatioOnly : 1;
+ uint8_t AltFmt : 1;
+ uint8_t TWiden = 0;
public:
VSETVLIInfo()
@@ -579,6 +604,8 @@ class VSETVLIInfo {
RISCVVType::VLMUL getVLMUL() const { return VLMul; }
bool getTailAgnostic() const { return TailAgnostic; }
bool getMaskAgnostic() const { return MaskAgnostic; }
+ bool getAltFmt() const { return AltFmt; }
+ unsigned getTWiden() const { return TWiden; }
bool hasNonZeroAVL(const LiveIntervals *LIS) const {
if (hasAVLImm())
@@ -640,21 +667,31 @@ class VSETVLIInfo {
SEW = RISCVVType::getSEW(VType);
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
+ AltFmt = RISCVVType::isAltFmt(VType);
+ TWiden =
+ RISCVVType::hasXSfmmWiden(VType) ? RISCVVType::getXSfmmWiden(VType) : 0;
}
- void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
+ void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA, bool Altfmt,
+ unsigned W) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
SEW = S;
TailAgnostic = TA;
MaskAgnostic = MA;
+ AltFmt = Altfmt;
+ TWiden = W;
}
+ void setAltFmt(bool AF) { AltFmt = AF; }
+
void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
+ if (TWiden != 0)
+ return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
}
@@ -667,9 +704,9 @@ class VSETVLIInfo {
"Can't compare VTYPE in unknown state");
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
"Can't compare when only LMUL/SEW ratio is valid.");
- return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
+ return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden) ==
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
- Other.MaskAgnostic);
+ Other.MaskAgnostic, Other.AltFmt, Other.TWiden);
}
unsigned getSEWLMULRatio() const {
@@ -819,6 +856,8 @@ class VSETVLIInfo {
<< "TailAgnostic=" << (bool)TailAgnostic << ", "
<< "MaskAgnostic=" << (bool)MaskAgnostic << ", "
<< "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
+ OS << "TWiden=" << (bool)TWiden << ", ";
+ OS << "AltFmt=" << (bool)AltFmt << "}";
}
#endif
};
@@ -846,6 +885,11 @@ struct BlockData {
BlockData() = default;
};
+enum TKTMMode {
+ VSETTK = 0,
+ VSETTM = 1,
+};
+
class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
@@ -901,6 +945,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
+ bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const;
};
} // end anonymous namespace
@@ -938,6 +983,16 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else if (RISCVInstrInfo::isXSfmmVectorTNConfigInstr(MI)) {
+ Register ATReg = MI.getOperand(1).getReg();
+ switch (MI.getOpcode()) {
+ case RISCV::PseudoSF_VSETTNTX0:
+ NewInfo.setAVLVLMAX();
+ break;
+ case RISCV::PseudoSF_VSETTNT:
+ NewInfo.setAVLRegDef(getVNInfoFromReg(ATReg, MI, LIS), ATReg);
+ break;
+ }
} else {
assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
MI.getOpcode() == RISCV::PseudoVSETVLIX0);
@@ -998,11 +1053,36 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+ bool AltFmt = RISCVII::getAltFmtType(TSFlags) == RISCVII::AltFmtType::AltFmt;
+ InstrInfo.setAltFmt(AltFmt);
+
unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
// A Log2SEW of 0 is an operation on mask registers only.
unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+ if (RISCVII::hasTWidenOp(TSFlags)) {
+ assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+
+ const MachineOperand &TWidenOp =
+ MI.getOperand(MI.getNumExplicitOperands() - 1);
+ unsigned TWiden = TWidenOp.getImm();
+
+ InstrInfo.setAVLVLMAX();
+ if (RISCVII::hasVLOp(TSFlags)) {
+ const MachineOperand &TnOp =
+ MI.getOperand(RISCVII::getTNOpNum(MI.getDesc()));
+
+ if (TnOp.getReg().isVirtual())
+ InstrInfo.setAVLRegDef(getVNInfoFromReg(TnOp.getReg(), MI, LIS),
+ TnOp.getReg());
+ }
+
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden);
+
+ return InstrInfo;
+ }
+
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isImm()) {
@@ -1038,7 +1118,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
}
#endif
- InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ // TODO: Propagate the twiden from previous vtype for potential reuse.
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt,
+ /*TWiden*/ 0);
forwardVSETVLIAVL(InstrInfo);
@@ -1048,8 +1130,30 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
-
++NumInsertedVSETVL;
+
+ if (Info.getTWiden()) {
+ if (Info.hasAVLVLMAX()) {
+ Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0))
+ .addReg(DestReg, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE());
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*MI);
+ LIS->createAndComputeVirtRegInterval(DestReg);
+ }
+ } else {
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(Info.getAVLReg())
+ .addImm(Info.encodeVTYPE());
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*MI);
+ }
+ return;
+ }
+
if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
@@ -1190,7 +1294,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
// be coalesced into another vsetvli since we won't demand any fields.
VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
NewInfo.setAVLImm(1);
- NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
+ NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true,
+ /*AltFmt*/ false, /*W*/ 0);
Info = NewInfo;
return;
}
@@ -1232,7 +1337,9 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
(Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
IncomingInfo.getTailAgnostic(),
(Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
- IncomingInfo.getMaskAgnostic());
+ IncomingInfo.getMaskAgnostic(),
+ (Demanded.UseAltFmt ? IncomingInfo : Info).getAltFmt(),
+ Demanded.UseTWiden ? IncomingInfo.getTWiden() : 0);
// If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
// the AVL.
@@ -1285,7 +1392,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
- isVectorCopy(ST->getRegisterInfo(), MI))
+ isVectorCopy(ST->getRegisterInfo(), MI) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI))
HadVectorOp = true;
transferAfter(Info, MI);
@@ -1667,6 +1775,10 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
};
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
+ // TODO: Support XSfmm.
+ if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI))
+ continue;
if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
Used.doUnion(getDemanded(MI, ST));
@@ -1774,6 +1886,80 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
}
}
+static void shrinkIntervalAndRemoveDeadMI(MachineOperand &MO,
+ LiveIntervals *LIS) {
+ Register Reg = MO.getReg();
+ MO.setReg(RISCV::NoRegister);
+ MO.setIsKill(false);
+
+ if (!LIS)
+ return;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+
+ // Erase the AVL operand from the instruction.
+ SmallVector<MachineInstr *> DeadMIs;
+ LIS->shrinkToUses(&LI, &DeadMIs);
+ // TODO: Enable this once needVSETVLIPHI is supported.
+ // SmallVector<LiveInterval *> SplitLIs;
+ // LIS->splitSeparateComponents(LI, SplitLIs);
+
+ for (MachineInstr *DeadMI : DeadMIs) {
+ LIS->RemoveMachineInstrFromMaps(*DeadMI);
+ DeadMI->eraseFromParent();
+ }
+}
+
+bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
+ TKTMMode Mode) const {
+
+ bool Changed = false;
+ for (auto &MI : MBB) {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) ||
+ !RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags))
+ continue;
+
+ VSETVLIInfo CurrInfo = computeInfoForInstr(MI);
+
+ if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags))
+ continue;
+
+ if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags))
+ continue;
+
+ unsigned OpNum = 0;
+ unsigned Opcode = 0;
+ switch (Mode) {
+ case VSETTK:
+ OpNum = RISCVII::getTKOpNum(MI.getDesc());
+ Opcode = RISCV::PseudoSF_VSETTK;
+ break;
+ case VSETTM:
+ OpNum = RISCVII::getTMOpNum(MI.getDesc());
+ Opcode = RISCV::PseudoSF_VSETTM;
+ break;
+ }
+
+ assert(OpNum && Opcode && "Invalid OpNum or Opcode");
+
+ const MachineOperand &Op = MI.getOperand(OpNum);
+
+ auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(Op.getReg())
+ .addImm(Log2_32(CurrInfo.getSEW()))
+ .addImm((CurrInfo.getTWiden() >> 1) + 1);
+
+ Changed = true;
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*TmpMI);
+
+ shrinkIntervalAndRemoveDeadMI(MI.getOperand(OpNum), LIS);
+ }
+ return Changed;
+}
+
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
@@ -1851,6 +2037,12 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
insertReadVL(MBB);
+ for (MachineBasicBlock &MBB : MF)
+ insertVSETMTK(MBB, VSETTM);
+
+ for (MachineBasicBlock &MBB : MF)
+ insertVSETMTK(MBB, VSETTK);
+
BlockInfo.clear();
return HaveVectorOp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 088a6923fadb1..81105b7815838 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -257,6 +257,25 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
// Indicates the EEW of a vector instruction's destination operand.
EEW DestEEW = EEWSEWx1;
let TSFlags{25-24} = DestEEW.Value;
+
+ // 0 -> Don't care about altfmt bit in VTYPE.
+ // 1 -> Is not altfmt.
+ // 2 -> Is altfmt(BF16).
+ bits<2> AltFmtType = 0;
+ let TSFlags{27-26} = AltFmtType;
+
+ bit IsWiden = 0;
+ let TSFlags{28} = IsWiden;
+
+ // XSfmmbase
+ bit HasTWidenOp = 0;
+ let TSFlags{29} = HasTWidenOp;
+
+ bit HasTmOp = 0;
+ let TSFlags{30} = HasTmOp;
+
+ bit HasTkOp = 0;
+ let TSFlags{31} = HasTkOp;
}
class RVInst<dag outs, dag ins, string opcodestr, string argstr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 86a4e8e370ee6..5c6270da592ec 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2843,6 +2843,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
else
Ok = RISCVFPRndMode::isValidRoundingMode(Imm);
break;
+ case RISCVOp::OPERAND_XSFMM_VTYPE:
+ Ok = RISCVVType::isValidXSfmmVType(Imm);
+ break;
}
if (!Ok) {
ErrInfo = "Invalid immediate";
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 281f8d55932b9..a9e7c86b33ddd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -128,6 +128,9 @@ defvar TAIL_AGNOSTIC = 1;
defvar TU_MU = 0;
defvar TA_MU = 1;
defvar TA_MA = 3;
+defvar DONT_CARE_ALTFMT = 0;
+defvar IS_NOT_ALTFMT = 1;
+defvar IS_ALTFMT = 2;
//===----------------------------------------------------------------------===//
// Utilities.
@@ -159,7 +162,8 @@ class PseudoToVInst<string PseudoInst> {
["_M4", ""],
["_M8", ""],
["_SE", ""],
- ["_RM", ""]
+ ["_RM", ""],
+ ["_ALT", ""]
];
string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst,
!subst(AffixSubst[0], AffixSubst[1], Acc));
@@ -6436,7 +6440,7 @@ let Defs = [VXSAT] in {
// 13. Vector Floating-Point Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
@@ -6609,7 +6613,7 @@ defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM;
defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 14. Vector Reduction Operations
@@ -6637,7 +6641,7 @@ defm PseudoVWREDSUM : VPseudoVWRED_VS;
}
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
//===----------------------------------------------------------------------===//
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -6656,7 +6660,7 @@ defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM;
defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM;
}
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 15. Vector Mask Instructions
@@ -6749,7 +6753,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
// 16.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach f = FPList in {
let HasSEWOp = 1, BaseInstr = VFMV_F_S in
@@ -6768,7 +6772,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
RISCVVPseudo;
}
}
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 16.3. Vector Slide Instructions
@@ -6780,10 +6784,10 @@ let Predicates = [HasVInstructions] in {
defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX;
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">;
defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF;
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 16.4. Vector Register Gather Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index affe3a8476753..c715373386d50 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -419,8 +419,10 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>;
- defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>;
+ let AltFmtType = IS_NOT_ALTFMT in {
+ defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>;
+ defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>;
+ }
}
}
foreach m = MxListW in {
@@ -430,7 +432,8 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach f = FPListW in {
foreach m = f.MxList in
- defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>;
+ let AltFmtType = IS_NOT_ALTFMT in
+ defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
index 66cb2d53da960..750cd89fb5eb8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
@@ -272,3 +272,141 @@ let Uses = [FRM], mayRaiseFPException = true in {
} // Predicates = [HasVendorXSfmm32a8f]
} // DecoderNamespace = "XSfvector"
+
+class VPseudoSF_VTileLoad
+ : Pseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden), []>,
+ RISCVVPseudo {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileStore
+ : Pseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileMove_V_T
+ : Pseudo<(outs VRM8:$vd), (ins GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileMove_T_V
+ : Pseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_MatMul<RegisterClass mtd_class>
+ : Pseudo<(outs),
+ (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, AVL:$atm, AVL:$atn,
+ AVL:$atk, ixlenimm:$sew, ixlenimm:$twiden), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasTmOp = 1;
+ let HasVLOp = 1; // Tn
+ let HasTkOp = 1;
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_MatMul_FRM<RegisterClass mtd_class>
+ : Pseudo<(outs),
+ (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, ixlenimm:$frm,
+ AVL:$atm, AVL:$atn, AVL:$atk, ixlenimm:$sew,
+ ixlenimm:$twiden), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasTmOp = 1;
+ let HasVLOp = 1; // Tn
+ let HasTkOp = 1;
+ let HasSEWOp = 1;
+ let HasRoundModeOp = 1;
+ let hasPostISelHook = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let Defs = [VL, VTYPE] in {
+ def PseudoSF_VSETTNT
+ : Pseudo<(outs GPR:$rd),
+ (ins GPRNoX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTNTX0
+ : Pseudo<(outs GPR:$rd),
+ (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+}
+
+let Defs = [VTYPE], Uses = [VTYPE], HasTWidenOp = 1, HasSEWOp = 1 in {
+ def PseudoSF_VSETTM
+ : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, ixlenimm:$log2sew, ixlenimm:$twiden), []>,
+ PseudoInstExpansion<(SF_VSETTM GPR:$rd, GPR:$rs1)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTK
+ : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, ixlenimm:$logwsew, ixlenimm:$twiden), []>,
+ PseudoInstExpansion<(SF_VSETTK GPR:$rd, GPR:$rs1)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+}
+}
+
+foreach eew = [8, 16, 32, 64] in {
+ def PseudoSF_VLTE # eew : VPseudoSF_VTileLoad;
+ def PseudoSF_VSTE # eew : VPseudoSF_VTileStore;
+}
+
+def PseudoSF_VTMV_T_V : VPseudoSF_VTileMove_T_V;
+def PseudoSF_VTMV_V_T : VPseudoSF_VTileMove_V_T;
+
+foreach a = I8Encodes in
+ foreach b = I8Encodes in
+ def PseudoSF_MM_ # !toupper(a.Name) # _ # !toupper(b.Name)
+ : VPseudoSF_MatMul<TRM4>;
+
+let AltFmtType = IS_NOT_ALTFMT in
+ def PseudoSF_MM_F_F : VPseudoSF_MatMul_FRM<TRM2>;
+let AltFmtType = IS_ALTFMT in
+ def PseudoSF_MM_F_F_ALT : VPseudoSF_MatMul_FRM<TRM2>;
+
+foreach e1 = [5, 4] in
+ foreach e2 = [5, 4] in
+ def PseudoSF_MM_E # e1 # M # !sub(7, e1) # _E # e2 # M # !sub(7, e2)
+ : VPseudoSF_MatMul_FRM<TRM4>;
+
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
+ let HasVLOp = 1, HasTmOp = 1, HasTWidenOp = 1, HasSEWOp = 1 in
+ def PseudoSF_VTZERO_T
+ : Pseudo<(outs), (ins TR:$rd, AVL:$atm, AVL:$atn, ixlenimm:$sew, ixlenimm:$twiden), []>, RISCVVPseudo;
+ def PseudoSF_VTDISCARD : Pseudo<(outs), (ins), []>, RISCVVPseudo;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 4c37cb7e393bf..1a5c6167ee80a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -71,7 +71,38 @@ def isVectorConfigInstr
PseudoVSETVLI,
PseudoVSETVLIX0,
PseudoVSETVLIX0X0,
- PseudoVSETIVLI
+ PseudoVSETIVLI,
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0
+ ]>>>;
+
+// Returns true if this is a PseudoSF_VSETTNT* instructions.
+def isXSfmmVectorTNConfigInstr
+ : TIIPredicate<"isXSfmmVectorTNConfigInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0
+ ]>>>;
+
+// Returns true if this is PseudoSF_VSETTM or PseudoSF_VSETTK.
+def isXSfmmVectorConfigTMTKInstr
+ : TIIPredicate<"isXSfmmVectorConfigTMTKInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTM,
+ PseudoSF_VSETTK
+ ]>>>;
+
+// Returns true if this is a XSfmm vector configuration instruction.
+def isXSfmmVectorConfigInstr
+ : TIIPredicate<"isXSfmmVectorConfigInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTM,
+ PseudoSF_VSETTK
]>>>;
// Return true if this is 'vsetvli x0, x0, vtype' which preserves
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 112142e1ef2f2..6888f74da244f 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -165,6 +165,10 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Shadow stack pointer.
markSuperRegs(Reserved, RISCV::SSP);
+ // XSfmmbase
+ for (MCPhysReg Reg = RISCV::T0; Reg <= RISCV::T15; Reg++)
+ markSuperRegs(Reserved, Reg);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir
new file mode 100644
index 0000000000000..9b0b0c41581bc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir
@@ -0,0 +1,523 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v \
+# RUN: -run-pass=phi-node-elimination,register-coalescer,riscv-insert-vsetvli | FileCheck %s
+
+--- |
+ define void @xsfmm_same_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 noundef %tm, i64 noundef %tn, i64 noundef %tk) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ ret void
+ }
+
+ define void @xsfmm_different_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 4)
+ ret void
+ }
+
+ define void @xsfmm_different_state_bf(<vscale x 32 x half> %tile1, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64 2, <vscale x 32 x bfloat> %tile2, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ ret void
+ }
+
+ define <vscale x 64 x i8> @interleave_rvv_and_xsfmm(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) {
+ entry:
+ %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl)
+ %1 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl)
+ call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl)
+ ret <vscale x 64 x i8> %1
+ }
+
+ define <vscale x 64 x i8> @interleave_rvv_and_xsfmm2(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) {
+ entry:
+ %0 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %tile, i64 %vl)
+ %1 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl)
+ %2 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl)
+ call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl)
+ ret <vscale x 64 x i8> %2
+ }
+
+ define void @consecutive_xsfmm(<vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, ptr %base) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 0, <vscale x 32 x half> %tile, <vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ call void @llvm.riscv.sf.vste16.i64(i64 0, ptr %base, i64 %tn)
+ ret void
+ }
+
+ define i64 @vsettnt_max(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2)
+ %1 = call i64 @llvm.riscv.sf.vsettnt_max.i64(i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define i64 @single_vsettm(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define i64 @single_vsettn(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettn.i64(i64 %vl, i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define i64 @single_vsettk(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettk.i64(i64 %vl, i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define void @sf_vtzero(i64 %tm, i64 %tn) {
+ entry:
+ call void @llvm.riscv.sf.vtzero.i64(i64 1, i64 %tm, i64 %tn, i64 3, i64 4)
+ ret void
+ }
+
+ declare void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64, <vscale x 32 x half>, <vscale x 32 x half>, i64, i64, i64, i64)
+ declare void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i64, i64, i64, i64)
+ declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64, i64)
+ declare <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i8>, i64)
+ declare void @llvm.riscv.sf.vste16.i64(i64, ptr, i64)
+ declare i64 @llvm.riscv.sf.vsettnt_max.i64(i64, i64)
+ declare i64 @llvm.riscv.sf.vsettm.i64(i64, i64, i64)
+ declare i64 @llvm.riscv.sf.vsettn.i64(i64, i64, i64)
+ declare i64 @llvm.riscv.sf.vsettk.i64(i64, i64, i64)
+ declare void @llvm.riscv.sf.vtzero.i64(i64, i64, i64, i64, i64)
+...
+---
+name: xsfmm_same_state
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: vrm8 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$v8m8', virtual-reg: '%1' }
+ - { reg: '$x10', virtual-reg: '%2' }
+ - { reg: '$x11', virtual-reg: '%3' }
+ - { reg: '$x12', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-LABEL: name: xsfmm_same_state
+ ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %4:gprnox0 = COPY $x12
+ %3:gprnox0 = COPY $x11
+ %2:gprnox0 = COPY $x10
+ %1:vrm8 = COPY $v16m8
+ %0:vrm8 = COPY $v8m8
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoRET
+...
+---
+name: xsfmm_different_state
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: vrm8 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$v8m8', virtual-reg: '%1' }
+ - { reg: '$x10', virtual-reg: '%2' }
+ - { reg: '$x11', virtual-reg: '%3' }
+ - { reg: '$x12', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-LABEL: name: xsfmm_different_state
+ ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1544, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 3, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 3, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 4, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %4:gprnox0 = COPY $x12
+ %3:gprnox0 = COPY $x11
+ %2:gprnox0 = COPY $x10
+ %1:vrm8 = COPY $v16m8
+ %0:vrm8 = COPY $v8m8
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 4, implicit $frm
+ PseudoRET
+...
+---
+name: xsfmm_different_state_bf
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: vrm8 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$v8m8', virtual-reg: '%1' }
+ - { reg: '$x10', virtual-reg: '%2' }
+ - { reg: '$x11', virtual-reg: '%3' }
+ - { reg: '$x12', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-LABEL: name: xsfmm_different_state_bf
+ ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1288, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F_ALT $t2, [[COPY3]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %4:gprnox0 = COPY $x12
+ %3:gprnox0 = COPY $x11
+ %2:gprnox0 = COPY $x10
+ %1:vrm8 = COPY $v16m8
+ %0:vrm8 = COPY $v8m8
+ PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F_ALT $t2, %1:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoRET
+...
+---
+name: interleave_rvv_and_xsfmm
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: gprnox0 }
+ - { id: 2, class: gpr }
+ - { id: 3, class: gpr }
+ - { id: 4, class: vrm8 }
+ - { id: 5, class: vrm8 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$x10', virtual-reg: '%1' }
+ - { reg: '$x11', virtual-reg: '%2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $x10, $x11
+ ; CHECK-LABEL: name: interleave_rvv_and_xsfmm
+ ; CHECK: liveins: $v8m8, $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[PseudoSF_VTMV_V_T]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_]], implicit $vtype
+ ; CHECK-NEXT: PseudoRET implicit $v8m8
+ %2:gpr = COPY $x11
+ %1:gprnox0 = COPY $x10
+ %0:vrm8 = COPY $v8m8
+ %3:gpr = ADDI $x0, 1
+ %4:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1
+ %5:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0
+ PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1
+ $v8m8 = COPY %5:vrm8
+ PseudoRET implicit $v8m8
+...
+---
+name: interleave_rvv_and_xsfmm2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: gprnox0 }
+ - { id: 2, class: gpr }
+ - { id: 3, class: gpr }
+ - { id: 4, class: vrm8 }
+ - { id: 5, class: vrm8 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$x10', virtual-reg: '%1' }
+ - { reg: '$x11', virtual-reg: '%2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $x10, $x11
+ ; CHECK-LABEL: name: interleave_rvv_and_xsfmm2
+ ; CHECK: liveins: $v8m8, $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[COPY2]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M8_1:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[PseudoVADD_VV_M8_]], [[PseudoVADD_VV_M8_]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_1]], implicit $vtype
+ ; CHECK-NEXT: PseudoRET implicit $v8m8
+ %2:gpr = COPY $x11
+ %1:gprnox0 = COPY $x10
+ %0:vrm8 = COPY $v8m8
+ %3:gpr = ADDI $x0, 1
+ %4:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %0:vrm8, %1:gprnox0, 3, 0
+ %5:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1
+ %6:vrm8 = PseudoVADD_VV_M8 $noreg, %4:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0
+ PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1
+ $v8m8 = COPY %6:vrm8
+ PseudoRET implicit $v8m8
+...
+---
+name: consecutive_xsfmm
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: gprnox0 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$x10', virtual-reg: '%1' }
+ - { reg: '$x11', virtual-reg: '%2' }
+ - { reg: '$x12', virtual-reg: '%3' }
+ - { reg: '$x13', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $x10, $x11, $x12, $x13
+ ; CHECK-LABEL: name: consecutive_xsfmm
+ ; CHECK: liveins: $v8m8, $x10, $x11, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:gprnox0 = COPY $x13
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY2]], 1032, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY1]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY3]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY]], [[COPY]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY3]], 520, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoSF_VSTE16 [[COPY1]], [[COPY2]], $noreg, 4, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:vrm8 = COPY $v8m8
+ %1:gprnox0 = COPY $x10
+ %2:gprnox0 = COPY $x11
+ %3:gprnox0 = COPY $x12
+ %4:gprnox0 = COPY $x13
+ PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 2, implicit $frm
+ PseudoSF_VSTE16 %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 1
+ PseudoRET
+...
+---
+name: vsettnt_max
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: vsettnt_max
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gpr = PseudoSF_VSETTNTX0 killed $x0, 520, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_1:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ %2:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ %3:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %3:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: single_vsettm
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: single_vsettm
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gpr = PseudoSF_VSETTNTX0 killed $x0, 520, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %1:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: single_vsettn
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: single_vsettn
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[PseudoSF_VSETTNT:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNT [[COPY]], 520, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTNT]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTNT %0:gprnox0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %1:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: single_vsettk
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: single_vsettk
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gpr = PseudoSF_VSETTNTX0 killed $x0, 520, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTK]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %1:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: sf_vtzero
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+ - { id: 1, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ ; CHECK-LABEL: name: sf_vtzero
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1536, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY]], 3, 3, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_VTZERO_T $t1, $noreg, $noreg, 3, 4, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = COPY $x11
+ PseudoSF_VTZERO_T $t1, %0:gprnox0, %1:gprnox0, 3, 4
+ PseudoRET
+...
>From 7bdbf86f50988c324a62ff51740d7efa6e02b5a4 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Mon, 7 Jul 2025 18:39:34 -0700
Subject: [PATCH 2/3] fixup! clang-format
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 88d1178eba9b1..45c8e040adcba 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1128,8 +1128,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
}
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc DL,
- const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
+ MachineBasicBlock::iterator InsertPt,
+ DebugLoc DL, const VSETVLIInfo &Info,
+ const VSETVLIInfo &PrevInfo) {
++NumInsertedVSETVL;
if (Info.getTWiden()) {
>From 12763e8e1962232b9449a1ef626b478cc3ae7e8f Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Mon, 7 Jul 2025 21:45:01 -0700
Subject: [PATCH 3/3] fixup! [RISCV] Add XSfmm pseudo instruction and vset*
insertion support
---
.../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 12 ++++-----
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 25 +++++++++++--------
llvm/lib/Target/RISCV/RISCVInstrFormats.td | 9 +++----
llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td | 7 +++++-
llvm/lib/Target/RISCV/RISCVInstrPredicates.td | 7 ++++--
5 files changed, 34 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index e470d51c6c5fa..71b3df1397982 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -145,11 +145,8 @@ enum {
AltFmtTypeShift = DestEEWShift + 2,
AltFmtTypeMask = 3ULL << AltFmtTypeShift,
- IsWidenShift = AltFmtTypeShift + 2,
- IsWidenMask = 1ULL << IsWidenShift,
-
// XSfmmbase
- HasTWidenOpShift = IsWidenShift + 1,
+ HasTWidenOpShift = AltFmtTypeShift + 2,
HasTWidenOpMask = 1ULL << HasTWidenOpShift,
HasTMOpShift = HasTWidenOpShift + 1,
@@ -256,7 +253,7 @@ static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
// This method is only called if we expect to have a VL operand, and all
// instructions with VL also have SEW.
assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
- // In Xsfmmbase, TN is alias for VL, so here we use the same TSFlags bit.
+ // In Xsfmmbase, TN is an alias for VL, so here we use the same TSFlags bit.
if (hasTWidenOp(TSFlags))
return getTNOpNum(Desc);
unsigned Offset = 2;
@@ -433,12 +430,13 @@ enum OperandType : unsigned {
OPERAND_SEW_MASK,
// Vector rounding mode for VXRM or FRM.
OPERAND_VEC_RM,
- OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM,
+ // Vtype operand for XSfmm extension.
+ OPERAND_XSFMM_VTYPE,
+ OPERAND_LAST_RISCV_IMM = OPERAND_XSFMM_VTYPE,
// Operand is either a register or uimm5, this is used by V extension pseudo
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
OPERAND_AVL,
- OPERAND_XSFMM_VTYPE,
};
} // namespace RISCVOp
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 45c8e040adcba..88e49e255ce11 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -527,7 +527,7 @@ class VSETVLIInfo {
uint8_t MaskAgnostic : 1;
uint8_t SEWLMULRatioOnly : 1;
uint8_t AltFmt : 1;
- uint8_t TWiden = 0;
+ uint8_t TWiden : 3;
public:
VSETVLIInfo()
@@ -856,7 +856,7 @@ class VSETVLIInfo {
<< "TailAgnostic=" << (bool)TailAgnostic << ", "
<< "MaskAgnostic=" << (bool)MaskAgnostic << ", "
<< "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
- OS << "TWiden=" << (bool)TWiden << ", ";
+ OS << "TWiden=" << (unsigned)TWiden << ", ";
OS << "AltFmt=" << (bool)AltFmt << "}";
}
#endif
@@ -984,13 +984,15 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
NewInfo.setAVLImm(MI.getOperand(1).getImm());
} else if (RISCVInstrInfo::isXSfmmVectorTNConfigInstr(MI)) {
- Register ATReg = MI.getOperand(1).getReg();
+ assert(MI.getOpcode() == RISCV::PseudoSF_VSETTNT ||
+ MI.getOpcode() == RISCV::PseudoSF_VSETTNTX0);
switch (MI.getOpcode()) {
case RISCV::PseudoSF_VSETTNTX0:
NewInfo.setAVLVLMAX();
break;
case RISCV::PseudoSF_VSETTNT:
- NewInfo.setAVLRegDef(getVNInfoFromReg(ATReg, MI, LIS), ATReg);
+ Register ATNReg = MI.getOperand(1).getReg();
+ NewInfo.setAVLRegDef(getVNInfoFromReg(ATNReg, MI, LIS), ATNReg);
break;
}
} else {
@@ -1070,12 +1072,12 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
InstrInfo.setAVLVLMAX();
if (RISCVII::hasVLOp(TSFlags)) {
- const MachineOperand &TnOp =
+ const MachineOperand &TNOp =
MI.getOperand(RISCVII::getTNOpNum(MI.getDesc()));
- if (TnOp.getReg().isVirtual())
- InstrInfo.setAVLRegDef(getVNInfoFromReg(TnOp.getReg(), MI, LIS),
- TnOp.getReg());
+ if (TNOp.getReg().isVirtual())
+ InstrInfo.setAVLRegDef(getVNInfoFromReg(TNOp.getReg(), MI, LIS),
+ TNOp.getReg());
}
InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden);
@@ -1888,7 +1890,8 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
}
static void shrinkIntervalAndRemoveDeadMI(MachineOperand &MO,
- LiveIntervals *LIS) {
+ LiveIntervals *LIS,
+ const TargetInstrInfo *TII) {
Register Reg = MO.getReg();
MO.setReg(RISCV::NoRegister);
MO.setIsKill(false);
@@ -1906,6 +1909,8 @@ static void shrinkIntervalAndRemoveDeadMI(MachineOperand &MO,
// LIS->splitSeparateComponents(LI, SplitLIs);
for (MachineInstr *DeadMI : DeadMIs) {
+ if (!TII->isAddImmediate(*DeadMI, Reg))
+ continue;
LIS->RemoveMachineInstrFromMaps(*DeadMI);
DeadMI->eraseFromParent();
}
@@ -1956,7 +1961,7 @@ bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
if (LIS)
LIS->InsertMachineInstrInMaps(*TmpMI);
- shrinkIntervalAndRemoveDeadMI(MI.getOperand(OpNum), LIS);
+ shrinkIntervalAndRemoveDeadMI(MI.getOperand(OpNum), LIS, TII);
}
return Changed;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 81105b7815838..2fe0a2648d6b6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -264,18 +264,15 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
bits<2> AltFmtType = 0;
let TSFlags{27-26} = AltFmtType;
- bit IsWiden = 0;
- let TSFlags{28} = IsWiden;
-
// XSfmmbase
bit HasTWidenOp = 0;
- let TSFlags{29} = HasTWidenOp;
+ let TSFlags{28} = HasTWidenOp;
bit HasTmOp = 0;
- let TSFlags{30} = HasTmOp;
+ let TSFlags{29} = HasTmOp;
bit HasTkOp = 0;
- let TSFlags{31} = HasTkOp;
+ let TSFlags{30} = HasTkOp;
}
class RVInst<dag outs, dag ins, string opcodestr, string argstr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
index 750cd89fb5eb8..83834734cbcf0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
@@ -363,7 +363,12 @@ let Defs = [VL, VTYPE] in {
PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
Sched<[WriteVSETVLI, ReadVSETVLI]>;
def PseudoSF_VSETTNTX0
- : Pseudo<(outs GPR:$rd),
+ : Pseudo<(outs GPRNoX0:$rd),
+ (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTNTX0X0
+ : Pseudo<(outs GPRX0:$rd),
(ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
Sched<[WriteVSETVLI, ReadVSETVLI]>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 1a5c6167ee80a..f6d2ee487bc4e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -73,7 +73,8 @@ def isVectorConfigInstr
PseudoVSETVLIX0X0,
PseudoVSETIVLI,
PseudoSF_VSETTNT,
- PseudoSF_VSETTNTX0
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0
]>>>;
// Returns true if this is a PseudoSF_VSETTNT* instructions.
@@ -82,7 +83,8 @@ def isXSfmmVectorTNConfigInstr
MCReturnStatement<
CheckOpcode<[
PseudoSF_VSETTNT,
- PseudoSF_VSETTNTX0
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0
]>>>;
// Returns true if this is PseudoSF_VSETTM or PseudoSF_VSETTK.
@@ -101,6 +103,7 @@ def isXSfmmVectorConfigInstr
CheckOpcode<[
PseudoSF_VSETTNT,
PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0,
PseudoSF_VSETTM,
PseudoSF_VSETTK
]>>>;
More information about the llvm-commits
mailing list