[llvm] [RISCV] Model dest EEW and fix peepholes not checking EEW (PR #105945)

Wed Aug 28 23:24:28 PDT 2024

https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/105945

>From 8fe51fa31b1f0f6aa338e1b60675b3f5e4b23c9e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 24 Aug 2024 20:25:14 +0800
Subject: [PATCH 1/3] Precommit test

---
 llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index b2526c6df6939e..e5cc17be91dbc1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -18,3 +18,12 @@ body: |
     %y:gpr = ADDI $x0, 1
     %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
 ...
+---
+name: different_eew
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: different_eew
+    ; CHECK: %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, 4, 4 /* e16 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, 4, 4 /* e16 */, 0 /* tu, mu */
+    %y:vr = PseudoVMV_V_V_MF8 $noreg, %x, 4, 3 /* e8 */, 0 /* tu, mu */
+...

>From 2dc29ef50f166a43dae63d4d757bcac39fda5ecc Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 24 Aug 2024 23:41:28 +0800
Subject: [PATCH 2/3] [RISCV] Model dest EEW and fix peepholes not checking EEW

Previously for vector peepholes that fold based on VL, we checked if the VLMAX is the same as a proxy to check that the EEWs were the same. This only worked at LMUL >= 1 because the EMULs of the Src output and user's input had to be the same because the register classes needed to match.

At fractional LMULs we would have incorrectly folded something like this:

    %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, 4, 4 /* e16 */, 0
    %y:vr = PseudoVMV_V_V_MF8 $noreg, %x, 4, 3 /* e8 */, 0

This models the EEW of the destination operands of vector instructions with a TSFlag, which is enough to fix the incorrect folding.

There's some overlap with the TargetOverlapConstraintType and IsRVVWideningReduction. If we model the source operands as well we may be able to subsume them.
---
 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |  8 +++
 llvm/lib/Target/RISCV/RISCVInstrFormats.td    | 11 ++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      | 12 ++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |  4 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td      | 57 +++++++++++--------
 llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td    |  9 ++-
 llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td  |  5 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td    |  3 +-
 llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 27 +++++++--
 .../CodeGen/RISCV/rvv/vmv.v.v-peephole.mir    |  1 +
 .../Target/RISCV/RISCVInstrInfoTest.cpp       | 21 +++++++
 11 files changed, 123 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index c65bd5b1d33631..4f973aa4eb21f1 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -126,6 +126,14 @@ enum {
 
   ActiveElementsAffectResultShift = TargetOverlapConstraintTypeShift + 2,
   ActiveElementsAffectResultMask = 1ULL << ActiveElementsAffectResultShift,
+
+  // Indicates the EEW of a vector instruction's destination operand.
+  // 0 -> 1
+  // 1 -> SEW
+  // 2 -> SEW * 2
+  // 3 -> SEW * 4
+  DestEEWShift = ActiveElementsAffectResultShift + 1,
+  DestEEWMask = 3ULL << DestEEWShift,
 };
 
 // Helper functions to read TSFlags.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 95f157064d73e2..d35f16df2503f5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -158,6 +158,14 @@ def OPC_SYSTEM    : RISCVOpcode<"SYSTEM",    0b1110011>;
 def OPC_OP_VE     : RISCVOpcode<"OP_VE",     0b1110111>;
 def OPC_CUSTOM_3  : RISCVOpcode<"CUSTOM_3",  0b1111011>;
 
+class EEW <bits<2> val> {
+  bits<2> Value = val;
+}
+def EEW1     : EEW<0>;
+def EEWSEWx1 : EEW<1>;
+def EEWSEWx2 : EEW<2>;
+def EEWSEWx4 : EEW<3>;
+
 class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
                    list<dag> pattern, InstFormat format> : Instruction {
   let Namespace = "RISCV";
@@ -226,6 +234,9 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
 
   bit ActiveElementsAffectResult = 0;
   let TSFlags{23} = ActiveElementsAffectResult;
+
+  EEW DestEEW = EEWSEWx1;
+  let TSFlags{25-24} = DestEEW.Value;
 }
 
 class RVInst<dag outs, dag ins, string opcodestr, string argstr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 9dd79027d7a162..1e170f94e20f02 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -4002,3 +4002,15 @@ unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
     return 0;
   return RVV->BaseInstr;
 }
+
+unsigned RISCV::getDestEEW(const MCInstrDesc &Desc, unsigned Log2SEW) {
+  unsigned DestEEW =
+      (Desc.TSFlags & RISCVII::DestEEWMask) >> RISCVII::DestEEWShift;
+  // EEW = 1
+  if (DestEEW == 0)
+    return 1;
+  // EEW = SEW * n
+  unsigned Scaled = Log2SEW + (DestEEW - 1);
+  assert(Scaled >= 3 && Scaled <= 6);
+  return Scaled;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index ecb7982b3e5e36..e7b92a8d7c6440 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -351,6 +351,10 @@ std::optional<unsigned> getVectorLowDemandedScalarBits(uint16_t Opcode,
 // Returns the MC opcode of RVV pseudo instruction.
 unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode);
 
+// For a (non-pseudo) RVV instruction \p Desc and the given \p Log2SEW, returns
+// the EEW of the destination operand.
+unsigned getDestEEW(const MCInstrDesc &Desc, unsigned Log2SEW);
+
 // Special immediate for AVL operand of V pseudo instructions to indicate VLMax.
 static constexpr int64_t VLMaxSentinel = -1LL;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index a84e92b0fda262..fc94c61c07b9b7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1104,7 +1104,7 @@ def : InstAlias<"vneg.v $vd, $vs", (VRSUB_VX VR:$vd, VR:$vs, X0, zero_reg)>;
 // The destination vector register group cannot overlap a source vector
 // register group of a different element width (including the mask register
 // if masked), otherwise an illegal instruction exception is raised.
-let Constraints = "@earlyclobber $vd" in {
+let Constraints = "@earlyclobber $vd", DestEEW = EEWSEWx2 in {
 let RVVConstraint = WidenV in {
 defm VWADDU_V : VALU_MV_V_X<"vwaddu", 0b110000, "v">;
 defm VWSUBU_V : VALU_MV_V_X<"vwsubu", 0b110010, "v">;
@@ -1121,7 +1121,7 @@ defm VWSUBU_W : VALU_MV_V_X<"vwsubu", 0b110110, "w">;
 defm VWADD_W : VALU_MV_V_X<"vwadd", 0b110101, "w">;
 defm VWSUB_W : VALU_MV_V_X<"vwsub", 0b110111, "w">;
 } // RVVConstraint = WidenW
-} // Constraints = "@earlyclobber $vd"
+} // Constraints = "@earlyclobber $vd", DestEEW = EEWSEWx2
 
 def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm",
                 (VWADD_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
@@ -1147,10 +1147,11 @@ defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>;
 defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>;
 } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
 defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>;
-let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint,
+    DestEEW = EEW1 in {
 defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>;
 defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, DestEEW = EEW1
 
 // Vector Bitwise Logical Instructions
 defm VAND_V : VALU_IV_V_X_I<"vand", 0b001001>;
@@ -1183,7 +1184,7 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs",
                 (VNSRL_WX VR:$vd, VR:$vs, X0, zero_reg)>;
 
 // Vector Integer Comparison Instructions
-let RVVConstraint = NoConstraint in {
+let RVVConstraint = NoConstraint, DestEEW = EEW1 in {
 defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>;
 defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>;
 defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>;
@@ -1192,7 +1193,7 @@ defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>;
 defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>;
 defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>;
 defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>;
-} // RVVConstraint = NoConstraint
+} // RVVConstraint = NoConstraint, DestEEW = EEW1
 
 def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm",
                 (VMSLTU_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
@@ -1204,7 +1205,7 @@ def : InstAlias<"vmsge.vv $vd, $va, $vb$vm",
                 (VMSLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
 
 let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0,
-    mayStore = 0 in {
+    mayStore = 0, DestEEW = EEW1 in {
 // For unsigned comparisons we need to special case 0 immediate to maintain
 // the always true/false semantics we would invert if we just decremented the
 // immediate like we do for signed. To match the GNU assembler we will use
@@ -1227,7 +1228,7 @@ def PseudoVMSLT_VI : Pseudo<(outs VR:$vd),
 }
 
 let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0,
-    mayStore = 0 in {
+    mayStore = 0, DestEEW = EEW1 in {
 def PseudoVMSGEU_VX : Pseudo<(outs VR:$vd),
                              (ins VR:$vs2, GPR:$rs1),
                              [], "vmsgeu.vx", "$vd, $vs2, $rs1">;
@@ -1267,11 +1268,12 @@ defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>;
 defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>;
 
 // Vector Widening Integer Multiply Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
+    DestEEW = EEWSEWx2 in {
 defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>;
 defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>;
 defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, DestEEW = EEWSEWx2
 
 // Vector Single-Width Integer Multiply-Add Instructions
 defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>;
@@ -1280,10 +1282,12 @@ defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
 defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
 
 // Vector Widening Integer Multiply-Add Instructions
+let DestEEW = EEWSEWx2 in {
 defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
 defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>;
 defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>;
 defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>;
+} // DestEEW = EEWSEWx2
 
 // Vector Integer Merge Instructions
 defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>;
@@ -1342,7 +1346,8 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>;
 // Vector Widening Floating-Point Add/Subtract Instructions
 let Constraints = "@earlyclobber $vd",
     Uses = [FRM],
-    mayRaiseFPException = true in {
+    mayRaiseFPException = true,
+    DestEEW = EEWSEWx2 in {
 let RVVConstraint = WidenV in {
 defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000, "v">;
 defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010, "v">;
@@ -1355,7 +1360,7 @@ let RVVConstraint = WidenW in {
 defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">;
 defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">;
 } // RVVConstraint = WidenW
-} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true
+} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2
 
 // Vector Single-Width Floating-Point Multiply/Divide Instructions
 let Uses = [FRM], mayRaiseFPException = true in {
@@ -1366,9 +1371,9 @@ defm VFRDIV_V : VDIV_FV_F<"vfrdiv", 0b100001>;
 
 // Vector Widening Floating-Point Multiply
 let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
-    Uses = [FRM], mayRaiseFPException = true in {
+    Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in {
 defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2
 
 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
 let Uses = [FRM], mayRaiseFPException = true in {
@@ -1383,12 +1388,12 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
 }
 
 // Vector Widening Floating-Point Fused Multiply-Add Instructions
-let Uses = [FRM], mayRaiseFPException = true in {
+let Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in {
 defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
 defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
 defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;
 defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2
 
 // Vector Floating-Point Square-Root Instruction
 let Uses = [FRM], mayRaiseFPException = true in {
@@ -1420,14 +1425,14 @@ def : InstAlias<"vfabs.v $vd, $vs",
                 (VFSGNJX_VV VR:$vd, VR:$vs, VR:$vs, zero_reg)>;
 
 // Vector Floating-Point Compare Instructions
-let RVVConstraint = NoConstraint, mayRaiseFPException = true in {
+let RVVConstraint = NoConstraint, mayRaiseFPException = true, DestEEW = EEW1 in {
 defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>;
 defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>;
 defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>;
 defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>;
 defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>;
 defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>;
-} // RVVConstraint = NoConstraint, mayRaiseFPException = true
+} // RVVConstraint = NoConstraint, mayRaiseFPException = true, DestEEW = EEW1
 
 def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm",
                 (VMFLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
@@ -1471,7 +1476,7 @@ defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
 
 // Widening Floating-Point/Integer Type-Convert Instructions
 let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt,
-    mayRaiseFPException = true in {
+    mayRaiseFPException = true, DestEEW = EEWSEWx2 in {
 let Uses = [FRM] in {
 defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
 defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
@@ -1481,7 +1486,7 @@ defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
 defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
 defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
 defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt, DestEEW = EEWSEWx2
 
 // Narrowing Floating-Point/Integer Type-Convert Instructions
 let Constraints = "@earlyclobber $vd", mayRaiseFPException = true in {
@@ -1515,14 +1520,14 @@ defm VREDXOR  : VRED_MV_V<"vredxor", 0b000011>;
 } // RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1
 
 // Vector Widening Integer Reduction Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1 in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1, DestEEW = EEWSEWx2 in {
 // Set earlyclobber for following instructions for second and mask operands.
 // This has the downside that the earlyclobber constraint is too coarse and
 // will impose unnecessary restrictions by not allowing the destination to
 // overlap with the first (wide) operand.
 defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>;
 defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1
+} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1, DestEEW = EEWSEWx2
 
 } // Predicates = [HasVInstructions]
 
@@ -1543,7 +1548,7 @@ def : InstAlias<"vfredsum.vs $vd, $vs2, $vs1$vm",
                 (VFREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>;
 
 // Vector Widening Floating-Point Reduction Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1 in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1, DestEEW = EEWSEWx2 in {
 // Set earlyclobber for following instructions for second and mask operands.
 // This has the downside that the earlyclobber constraint is too coarse and
 // will impose unnecessary restrictions by not allowing the destination to
@@ -1552,7 +1557,7 @@ let Uses = [FRM], mayRaiseFPException = true in {
 defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>;
 defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>;
 }
-} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1
+} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ActiveElementsAffectResult = 1, DestEEW = EEWSEWx2
 
 def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm",
                 (VFWREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>;
@@ -1560,7 +1565,7 @@ def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm",
 
 let Predicates = [HasVInstructions] in {
 // Vector Mask-Register Logical Instructions
-let RVVConstraint = NoConstraint in {
+let RVVConstraint = NoConstraint, DestEEW = EEW1 in {
 defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">;
 defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">;
 defm VMANDN_M : VMALU_MV_Mask<"vmandn", 0b011000, "m">;
@@ -1607,12 +1612,14 @@ def : InstAlias<"vpopc.m $vd, $vs2$vm",
 
 let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ActiveElementsAffectResult = 1 in {
 
+let DestEEW = EEW1 in {
 // vmsbf.m set-before-first mask bit
 defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>;
 // vmsif.m set-including-first mask bit
 defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>;
 // vmsof.m set-only-first mask bit
 defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>;
+} // DestEEW = EEW1
 // Vector Iota Instruction
 defm VIOTA_M : VIOTA_MV_V<"viota.m", 0b010100, 0b10000>;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index bd5319af80ffc8..c1edbcd33b2dfe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -201,21 +201,24 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
   defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR,    VR,    FPR32>, Sched<[]>;
 }
 
-let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvqmaccdod" in {
+let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvqmaccdod",
+    DestEEW = EEWSEWx4 in {
   def VQMACCU_2x8x2  : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">;
   def VQMACC_2x8x2   : CustomSiFiveVMACC<0b101101, OPMVV, "sf.vqmacc.2x8x2">;
   def VQMACCUS_2x8x2 : CustomSiFiveVMACC<0b101110, OPMVV, "sf.vqmaccus.2x8x2">;
   def VQMACCSU_2x8x2 : CustomSiFiveVMACC<0b101111, OPMVV, "sf.vqmaccsu.2x8x2">;
 }
 
-let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq" in {
+let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq",
+    DestEEW = EEWSEWx4 in {
   def VQMACCU_4x8x4  : CustomSiFiveVMACC<0b111100, OPMVV, "sf.vqmaccu.4x8x4">;
   def VQMACC_4x8x4   : CustomSiFiveVMACC<0b111101, OPMVV, "sf.vqmacc.4x8x4">;
   def VQMACCUS_4x8x4 : CustomSiFiveVMACC<0b111110, OPMVV, "sf.vqmaccus.4x8x4">;
   def VQMACCSU_4x8x4 : CustomSiFiveVMACC<0b111111, OPMVV, "sf.vqmaccsu.4x8x4">;
 }
 
-let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in {
+let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq",
+    DestEEW = EEWSEWx2 in {
   def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index 1b1f3b9b16e44f..a79f757753325c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -19,7 +19,7 @@
 
 let Predicates = [HasStdExtZvfbfmin], Constraints = "@earlyclobber $vd",
     mayRaiseFPException = true in {
-let RVVConstraint = WidenCvt in
+let RVVConstraint = WidenCvt, DestEEW = EEWSEWx2 in
 defm VFWCVTBF16_F_F_V : VWCVTF_FV_VS2<"vfwcvtbf16.f.f.v", 0b010010, 0b01101>;
 let Uses = [FRM] in
 defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>;
@@ -27,6 +27,7 @@ defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>;
 
 let Predicates = [HasStdExtZvfbfwma],
     Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb",
-    RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true in {
+    RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true,
+    DestEEW = EEWSEWx2 in {
 defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index cafd259031746d..e96d4cce7d89c2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -123,7 +123,8 @@ let Predicates = [HasStdExtZvbb] in {
   def  VCLZ_V   : VALUVs2<0b010010, 0b01100, OPMVV, "vclz.v">;
   def  VCPOP_V  : VALUVs2<0b010010, 0b01110, OPMVV, "vcpop.v">;
   def  VCTZ_V   : VALUVs2<0b010010, 0b01101, OPMVV, "vctz.v">;
-  let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in
+  let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
+      DestEEW = EEWSEWx2 in
   defm VWSLL_V  : VSHT_IV_V_X_I<"vwsll", 0b110101>;
 } // Predicates = [HasStdExtZvbb]
 
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 822ab492c710b4..e53c45d3ba247c 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -68,6 +68,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
   bool convertVMergeToVMv(MachineInstr &MI) const;
   bool foldVMV_V_V(MachineInstr &MI);
 
+  bool hasSameEEWVLMAX(const MachineInstr &User, const MachineInstr &Src) const;
   bool isAllOnesMask(const MachineInstr *MaskDef) const;
   std::optional<unsigned> getConstant(const MachineOperand &VL) const;
 
@@ -102,6 +103,24 @@ static unsigned getSEWLMULRatio(const MachineInstr &MI) {
   return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
 }
 
+/// Given \p User that has an input operand with EEW=SEW, which uses an output
+/// operand of \p Src with an unknown EEW, return true if their EEWs match and
+/// they have the same VLMAX.
+bool RISCVVectorPeephole::hasSameEEWVLMAX(const MachineInstr &User,
+                                          const MachineInstr &Src) const {
+  if (getSEWLMULRatio(User) != getSEWLMULRatio(Src))
+    return false;
+  unsigned UserLog2SEW =
+      User.getOperand(RISCVII::getSEWOpNum(User.getDesc())).getImm();
+  unsigned SrcLog2SEW =
+      Src.getOperand(RISCVII::getSEWOpNum(Src.getDesc())).getImm();
+  if (RISCV::getDestEEW(TII->get(RISCV::getRVVMCOpcode(Src.getOpcode())),
+                        SrcLog2SEW) != UserLog2SEW)
+    return false;
+
+  return true;
+}
+
 // Attempt to reduce the VL of an instruction whose sole use is feeding a
 // instruction with a narrower VL.  This currently works backwards from the
 // user instruction (which might have a smaller VL).
@@ -149,8 +168,8 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
       !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
     return false;
 
-  // Src needs to have the same VLMAX as MI
-  if (getSEWLMULRatio(MI) != getSEWLMULRatio(*Src))
+  // Src needs to have the same VLMAX and EEW as MI
+  if (!hasSameEEWVLMAX(MI, *Src))
     return false;
 
   bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
@@ -480,8 +499,8 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
       !RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags))
     return false;
 
-  // Src needs to have the same VLMAX as MI
-  if (getSEWLMULRatio(MI) != getSEWLMULRatio(*Src))
+  // Src needs to have the same VLMAX and EEW as MI
+  if (!hasSameEEWVLMAX(MI, *Src))
     return false;
 
   // Src needs to have the same passthru as VMV_V_V
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index e5cc17be91dbc1..ad7b6019858a7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -24,6 +24,7 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: different_eew
     ; CHECK: %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, 4, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVMV_V_V_MF8 $noreg, %x, 4, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, 4, 4 /* e16 */, 0 /* tu, mu */
     %y:vr = PseudoVMV_V_V_MF8 $noreg, %x, 4, 3 /* e8 */, 0 /* tu, mu */
 ...
diff --git a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp
index fe711619c63203..67a1ae44c46967 100644
--- a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp
+++ b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp
@@ -316,6 +316,27 @@ TEST_P(RISCVInstrInfoTest, DescribeLoadedValue) {
   MF->deleteMachineBasicBlock(MBB);
 }
 
+TEST_P(RISCVInstrInfoTest, GetDestEEW) {
+  const RISCVInstrInfo *TII = ST->getInstrInfo();
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VADD_VV), 3), 3u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VWADD_VV), 3), 4u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VLE32_V), 5), 5u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VLSE32_V), 5), 5u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VREDSUM_VS), 4), 4u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VWREDSUM_VS), 4), 5u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VFWREDOSUM_VS), 5), 6u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VFCVT_RTZ_XU_F_V), 4), 4u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VFWCVT_RTZ_XU_F_V), 4), 5u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VSLL_VI), 4), 4u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VWSLL_VI), 4), 5u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VMSEQ_VV), 4), 1u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VMAND_MM), 0), 1u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VIOTA_M), 3), 3u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VQMACCU_2x8x2), 3), 5u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::VFWMACC_4x4x4), 4), 5u);
+  EXPECT_EQ(RISCV::getDestEEW(TII->get(RISCV::THVdotVMAQA_VV), 5), 5u);
+}
+
 } // namespace
 
 INSTANTIATE_TEST_SUITE_P(RV32And64, RISCVInstrInfoTest,

>From 5a764595f0dbe040732099a516c8620fd16a6d56 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 29 Aug 2024 14:24:05 +0800
Subject: [PATCH 3/3] Don't check ratio

---
 llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 39 +++++++------------
 1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index e53c45d3ba247c..5c86a38f6f3f99 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -68,7 +68,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
   bool convertVMergeToVMv(MachineInstr &MI) const;
   bool foldVMV_V_V(MachineInstr &MI);
 
-  bool hasSameEEWVLMAX(const MachineInstr &User, const MachineInstr &Src) const;
+  bool hasSameEEW(const MachineInstr &User, const MachineInstr &Src) const;
   bool isAllOnesMask(const MachineInstr *MaskDef) const;
   std::optional<unsigned> getConstant(const MachineOperand &VL) const;
 
@@ -97,28 +97,17 @@ static bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
   return LHS.getImm() <= RHS.getImm();
 }
 
-static unsigned getSEWLMULRatio(const MachineInstr &MI) {
-  RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
-  unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
-  return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
-}
-
-/// Given \p User that has an input operand with EEW=SEW, which uses an output
-/// operand of \p Src with an unknown EEW, return true if their EEWs match and
-/// they have the same VLMAX.
-bool RISCVVectorPeephole::hasSameEEWVLMAX(const MachineInstr &User,
-                                          const MachineInstr &Src) const {
-  if (getSEWLMULRatio(User) != getSEWLMULRatio(Src))
-    return false;
-  unsigned UserLog2SEW =
+/// Given \p User that has an input operand with EEW=SEW, which uses the dest
+/// operand of \p Src with an unknown EEW, return true if their EEWs match.
+bool RISCVVectorPeephole::hasSameEEW(const MachineInstr &User,
+                                     const MachineInstr &Src) const {
+  unsigned UserSEW =
       User.getOperand(RISCVII::getSEWOpNum(User.getDesc())).getImm();
-  unsigned SrcLog2SEW =
+  unsigned SrcSEW =
       Src.getOperand(RISCVII::getSEWOpNum(Src.getDesc())).getImm();
-  if (RISCV::getDestEEW(TII->get(RISCV::getRVVMCOpcode(Src.getOpcode())),
-                        SrcLog2SEW) != UserLog2SEW)
-    return false;
-
-  return true;
+  unsigned SrcEEW = RISCV::getDestEEW(
+      TII->get(RISCV::getRVVMCOpcode(Src.getOpcode())), SrcSEW);
+  return SrcEEW == UserSEW;
 }
 
 // Attempt to reduce the VL of an instruction whose sole use is feeding a
@@ -168,8 +157,8 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
       !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
     return false;
 
-  // Src needs to have the same VLMAX and EEW as MI
-  if (!hasSameEEWVLMAX(MI, *Src))
+  // Src's dest needs to have the same EEW as MI's input.
+  if (!hasSameEEW(MI, *Src))
     return false;
 
   bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
@@ -499,8 +488,8 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
       !RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags))
     return false;
 
-  // Src needs to have the same VLMAX and EEW as MI
-  if (!hasSameEEWVLMAX(MI, *Src))
+  // Src's dest needs to have the same EEW as MI's input.
+  if (!hasSameEEW(MI, *Src))
     return false;
 
   // Src needs to have the same passthru as VMV_V_V