[llvm] c579ab5 - [RISCV] Move vfma_vl+fneg_vl matching to DAG combine.

Fri Jun 24 00:01:11 PDT 2022

Author: Craig Topper
Date: 2022-06-24T00:00:37-07:00
New Revision: c579ab53bd4ad8563fb9ae3b3e0267c3a51b27d9

URL: https://github.com/llvm/llvm-project/commit/c579ab53bd4ad8563fb9ae3b3e0267c3a51b27d9
DIFF: https://github.com/llvm/llvm-project/commit/c579ab53bd4ad8563fb9ae3b3e0267c3a51b27d9.diff

LOG: [RISCV] Move vfma_vl+fneg_vl matching to DAG combine.

This patch adds 3 new _VL RISCVISD opcodes to represent VFMA_VL with
different portions negated. It also adds a DAG combine to peek
through FNEG_VL to create these new opcodes.

This is modeled after similar code from X86.

This makes the isel patterns more regular and reduces the size of
the isel table by ~37K.

The test changes look like regressions, but they point to a bug that
was already there. We aren't able to commute a masked FMA instruction
to improve register allocation because we always use a mask undisturbed
policy. Prior to this patch we matched two multiply operands in a
different order and hid this issue for these test cases, but a different
test still could have encountered it.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D128310

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
    llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0a4cd380fb02f..b3ac308f1dd18 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3488,7 +3488,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::FSQRT:
     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
   case ISD::FMA:
-    return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
+    return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL);
   case ISD::SMIN:
     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
   case ISD::SMAX:
@@ -3563,7 +3563,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VP_FNEG:
     return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
   case ISD::VP_FMA:
-    return lowerVPOp(Op, DAG, RISCVISD::FMA_VL);
+    return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
   case ISD::VP_SIGN_EXTEND:
   case ISD::VP_ZERO_EXTEND:
     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
@@ -8490,6 +8490,40 @@ static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
                      DAG.getConstant(7, DL, VT));
 }
 
+// Convert from one FMA opcode to another based on whether we are negating the
+// multiply result and/or the accumulator.
+// NOTE: Only supports RVV operations with VL.
+static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
+  assert((NegMul || NegAcc) && "Not negating anything?");
+
+  // Negating the multiply result changes ADD<->SUB and toggles 'N'.
+  if (NegMul) {
+    // clang-format off
+    switch (Opcode) {
+    default: llvm_unreachable("Unexpected opcode");
+    case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFNMSUB_VL; break;
+    case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL;  break;
+    case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL;  break;
+    case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFNMADD_VL; break;
+    }
+    // clang-format on
+  }
+
+  // Negating the accumulator changes ADD<->SUB.
+  if (NegAcc) {
+    // clang-format off
+    switch (Opcode) {
+    default: llvm_unreachable("Unexpected opcode");
+    case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFMSUB_VL;  break;
+    case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFMADD_VL;  break;
+    case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
+    case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
+    }
+    // clang-format on
+  }
+
+  return Opcode;
+}
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -8996,6 +9030,40 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return V;
     // Mul is commutative.
     return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
+  case RISCVISD::VFMADD_VL:
+  case RISCVISD::VFNMADD_VL:
+  case RISCVISD::VFMSUB_VL:
+  case RISCVISD::VFNMSUB_VL: {
+    // Fold FNEG_VL into FMA opcodes.
+    SDValue A = N->getOperand(0);
+    SDValue B = N->getOperand(1);
+    SDValue C = N->getOperand(2);
+    SDValue Mask = N->getOperand(3);
+    SDValue VL = N->getOperand(4);
+
+    auto invertIfNegative = [&Mask, &VL](SDValue &V) {
+      if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
+          V.getOperand(2) == VL) {
+        // Return the negated input.
+        V = V.getOperand(0);
+        return true;
+      }
+
+      return false;
+    };
+
+    bool NegA = invertIfNegative(A);
+    bool NegB = invertIfNegative(B);
+    bool NegC = invertIfNegative(C);
+
+    // If no operands are negated, we're done.
+    if (!NegA && !NegB && !NegC)
+      return SDValue();
+
+    unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
+    return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
+                       VL);
+  }
   case ISD::STORE: {
     auto *Store = cast<StoreSDNode>(N);
     SDValue Val = Store->getValue();
@@ -11295,7 +11363,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(FNEG_VL)
   NODE_NAME_CASE(FABS_VL)
   NODE_NAME_CASE(FSQRT_VL)
-  NODE_NAME_CASE(FMA_VL)
+  NODE_NAME_CASE(VFMADD_VL)
+  NODE_NAME_CASE(VFNMADD_VL)
+  NODE_NAME_CASE(VFMSUB_VL)
+  NODE_NAME_CASE(VFNMSUB_VL)
   NODE_NAME_CASE(FCOPYSIGN_VL)
   NODE_NAME_CASE(SMIN_VL)
   NODE_NAME_CASE(SMAX_VL)

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index dfb2ba2f0f2c6..eb013d4b6682b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -242,7 +242,10 @@ enum NodeType : unsigned {
   FNEG_VL,
   FABS_VL,
   FSQRT_VL,
-  FMA_VL,
+  VFMADD_VL,
+  VFNMADD_VL,
+  VFMSUB_VL,
+  VFNMSUB_VL,
   FCOPYSIGN_VL,
   SMIN_VL,
   SMAX_VL,

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 03e7c2aeccd07..081f61617d593 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -104,7 +104,10 @@ def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
                                               SDTCVecEltisVT<4, i1>,
                                               SDTCisSameNumEltsAs<0, 4>,
                                               SDTCisVT<5, XLenVT>]>;
-def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfmadd_vl : SDNode<"RISCVISD::VFMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfnmadd_vl : SDNode<"RISCVISD::VFNMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfmsub_vl : SDNode<"RISCVISD::VFMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfnmsub_vl : SDNode<"RISCVISD::VFNMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
 
 def SDT_RISCVFPRoundOp_VL  : SDTypeProfile<1, 3, [
   SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>,
@@ -903,11 +906,44 @@ multiclass VPatNarrowShiftSplat_WX_WI<SDNode op, string instruction_name> {
   }
 }
 
-multiclass VPatWidenFPMulAccVL_VV_VF<string instruction_name> {
+multiclass VPatFPMulAddVL_VV_VF<SDNode vop, string instruction_name> {
+  foreach vti = AllFloatVectors in {
+  defvar suffix = vti.LMul.MX;
+  def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
+                             vti.RegClass:$rs2, (vti.Mask true_mask),
+                             VLOpFrag)),
+            (!cast<Instruction>(instruction_name#"_VV_"# suffix)
+                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+  def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
+                             vti.RegClass:$rs2, (vti.Mask V0),
+                             VLOpFrag)),
+            (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+  def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
+                             vti.RegClass:$rd, vti.RegClass:$rs2,
+                             (vti.Mask true_mask),
+                             VLOpFrag)),
+            (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix)
+                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+  def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
+                             vti.RegClass:$rd, vti.RegClass:$rs2,
+                             (vti.Mask V0),
+                             VLOpFrag)),
+            (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+  }
+}
+
+multiclass VPatWidenFPMulAccVL_VV_VF<SDNode vop, string instruction_name> {
   foreach vtiToWti = AllWidenableFloatVectors in {
     defvar vti = vtiToWti.Vti;
     defvar wti = vtiToWti.Wti;
-    def : Pat<(riscv_fma_vl
+    def : Pat<(vop
                    (wti.Vector (riscv_fpextend_vl_oneuse
                                     (vti.Vector vti.RegClass:$rs1),
                                     (vti.Mask true_mask), VLOpFrag)),
@@ -919,7 +955,7 @@ multiclass VPatWidenFPMulAccVL_VV_VF<string instruction_name> {
               (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
                  wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
                  GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-    def : Pat<(riscv_fma_vl
+    def : Pat<(vop
                    (wti.Vector (riscv_fpextend_vl_oneuse
                                     (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
                                     (vti.Mask true_mask), VLOpFrag)),
@@ -934,131 +970,6 @@ multiclass VPatWidenFPMulAccVL_VV_VF<string instruction_name> {
   }
 }
 
-multiclass VPatWidenFPNegMulAccVL_VV_VF<string instruction_name> {
-  foreach vtiToWti = AllWidenableFloatVectors in {
-    defvar vti = vtiToWti.Vti;
-    defvar wti = vtiToWti.Wti;
-    def : Pat<(riscv_fma_vl 
-                   (riscv_fneg_vl
-                       (wti.Vector (riscv_fpextend_vl_oneuse
-                                        (vti.Vector vti.RegClass:$rs1),
-                                        (vti.Mask true_mask), VLOpFrag)),
-                       (wti.Mask true_mask), VLOpFrag),
-                   (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
-                                             (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask),
-                                  VLOpFrag),
-                   (vti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-    def : Pat<(riscv_fma_vl
-                   (riscv_fpextend_vl_oneuse
-                        (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
-                        (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fneg_vl
-                        (wti.Vector (riscv_fpextend_vl_oneuse
-                                         (vti.Vector vti.RegClass:$rs2),
-                                         (vti.Mask true_mask), VLOpFrag)),
-                        (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask),
-                                  VLOpFrag),
-                   (vti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-    def : Pat<(riscv_fma_vl
-                   (riscv_fneg_vl
-                       (wti.Vector (riscv_fpextend_vl_oneuse
-                                        (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
-                                        (vti.Mask true_mask), VLOpFrag)),
-                                   (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
-                                             (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask),
-                                  VLOpFrag),
-                   (vti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  }
-}
-
-multiclass VPatWidenFPMulSacVL_VV_VF<string instruction_name> {
-  foreach vtiToWti = AllWidenableFloatVectors in {
-    defvar vti = vtiToWti.Vti;
-    defvar wti = vtiToWti.Wti;
-    def : Pat<(riscv_fma_vl
-                   (wti.Vector (riscv_fpextend_vl_oneuse
-                                    (vti.Vector vti.RegClass:$rs1),
-                                    (vti.Mask true_mask), VLOpFrag)),
-                   (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
-                                             (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fneg_vl wti.RegClass:$rd, (vti.Mask true_mask),
-                   VLOpFrag),
-                   (vti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-    def : Pat<(riscv_fma_vl
-                   (wti.Vector (riscv_fpextend_vl_oneuse
-                                    (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
-                                    (vti.Mask true_mask), VLOpFrag)),
-                   (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
-                                             (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fneg_vl wti.RegClass:$rd, (vti.Mask true_mask),
-                                  VLOpFrag),
-                   (vti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  }
-}
-
-multiclass VPatWidenFPNegMulSacVL_VV_VF<string instruction_name> {
-  foreach vtiToWti = AllWidenableFloatVectors in {
-    defvar vti = vtiToWti.Vti;
-    defvar wti = vtiToWti.Wti;
-    def : Pat<(riscv_fma_vl
-                   (riscv_fneg_vl
-                        (wti.Vector (riscv_fpextend_vl_oneuse
-                                         (vti.Vector vti.RegClass:$rs1),
-                                         (vti.Mask true_mask), VLOpFrag)),
-                        (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
-                                             (vti.Mask true_mask), VLOpFrag),
-                   wti.RegClass:$rd, (wti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-    def : Pat<(riscv_fma_vl
-                   (wti.Vector (riscv_fpextend_vl_oneuse
-                                    (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
-                                    (vti.Mask true_mask), VLOpFrag)),
-                   (riscv_fneg_vl
-                        (wti.Vector (riscv_fpextend_vl_oneuse
-                                         (vti.Vector vti.RegClass:$rs2), 
-                                         (vti.Mask true_mask), VLOpFrag)),
-                                  (wti.Mask true_mask), VLOpFrag),
-                   wti.RegClass:$rd, (wti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-    def : Pat<(riscv_fma_vl
-                   (riscv_fneg_vl
-                        (wti.Vector (riscv_fpextend_vl_oneuse
-                                         (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), 
-                                         (vti.Mask true_mask), VLOpFrag)),
-                        (vti.Mask true_mask), VLOpFrag),
-                   (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
-                                             (vti.Mask true_mask), VLOpFrag),
-                   wti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag),
-              (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
-                 wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Patterns.
 //===----------------------------------------------------------------------===//
@@ -1357,221 +1268,16 @@ defm : VPatBinaryFPVL_R_VF<riscv_fdiv_vl, "PseudoVFRDIV">;
 defm : VPatWidenBinaryFPVL_VV_VF<riscv_fmul_vl, riscv_fpextend_vl_oneuse, "PseudoVFWMUL">;
 
 // 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions.
-foreach vti = AllFloatVectors in {
-  // NOTE: We choose VFMADD because it has the most commuting freedom. So it
-  // works best with how TwoAddressInstructionPass tries commuting.
-  defvar suffix = vti.LMul.MX;
-  def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
-                                      vti.RegClass:$rs2, (vti.Mask true_mask),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
-                                      vti.RegClass:$rs2, (vti.Mask V0),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMADD_VV_"# suffix #"_MASK")
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
-                                      (riscv_fneg_vl vti.RegClass:$rs2,
-                                                     (vti.Mask true_mask),
-                                                     VLOpFrag),
-                                      (vti.Mask true_mask),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
-                                      (riscv_fneg_vl vti.RegClass:$rs2,
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                      (vti.Mask V0),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMSUB_VV_"# suffix #"_MASK")
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                      vti.RegClass:$rd,
-                                      (riscv_fneg_vl vti.RegClass:$rs2,
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                      (vti.Mask true_mask),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                      vti.RegClass:$rd,
-                                      (riscv_fneg_vl vti.RegClass:$rs2,
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                      (vti.Mask V0),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMADD_VV_"# suffix #"_MASK")
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                      vti.RegClass:$rd, vti.RegClass:$rs2,
-                                      (vti.Mask true_mask),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                      vti.RegClass:$rd, vti.RegClass:$rs2,
-                                      (vti.Mask V0),
-                                      VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix #"_MASK")
-                 vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
-  // commutable.
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       vti.RegClass:$rd, vti.RegClass:$rs2,
-                                       (vti.Mask true_mask),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix)
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       vti.RegClass:$rd, vti.RegClass:$rs2,
-                                       (vti.Mask V0),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       vti.RegClass:$rd,
-                                       (riscv_fneg_vl vti.RegClass:$rs2,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (vti.Mask true_mask),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix)
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       vti.RegClass:$rd,
-                                       (riscv_fneg_vl vti.RegClass:$rs2,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (vti.Mask V0),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       (riscv_fneg_vl vti.RegClass:$rd,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (riscv_fneg_vl vti.RegClass:$rs2,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (vti.Mask true_mask),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       (riscv_fneg_vl vti.RegClass:$rd,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (riscv_fneg_vl vti.RegClass:$rs2,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (vti.Mask V0),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       (riscv_fneg_vl vti.RegClass:$rd,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       vti.RegClass:$rs2,
-                                       (vti.Mask true_mask),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                       (riscv_fneg_vl vti.RegClass:$rd,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       vti.RegClass:$rs2,
-                                       (vti.Mask V0),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  // The splat might be negated.
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                       vti.RegClass:$rd,
-                                       (riscv_fneg_vl vti.RegClass:$rs2,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (vti.Mask true_mask),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                       vti.RegClass:$rd,
-                                       (riscv_fneg_vl vti.RegClass:$rs2,
-                                                      (vti.Mask srcvalue),
-                                                      VLOpFrag),
-                                       (vti.Mask V0),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                       vti.RegClass:$rd, vti.RegClass:$rs2,
-                                       (vti.Mask true_mask),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-  def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
-                                                     (vti.Mask srcvalue),
-                                                     VLOpFrag),
-                                       vti.RegClass:$rd, vti.RegClass:$rs2,
-                                       (vti.Mask V0),
-                                       VLOpFrag)),
-            (!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
-                 vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
-                 (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-}
+defm : VPatFPMulAddVL_VV_VF<riscv_vfmadd_vl,  "PseudoVFMADD">;
+defm : VPatFPMulAddVL_VV_VF<riscv_vfmsub_vl,  "PseudoVFMSUB">;
+defm : VPatFPMulAddVL_VV_VF<riscv_vfnmadd_vl, "PseudoVFNMADD">;
+defm : VPatFPMulAddVL_VV_VF<riscv_vfnmsub_vl, "PseudoVFNMSUB">;
 
 // 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
-defm : VPatWidenFPMulAccVL_VV_VF<"PseudoVFWMACC">;
-defm : VPatWidenFPNegMulAccVL_VV_VF<"PseudoVFWNMACC">;
-defm : VPatWidenFPMulSacVL_VV_VF<"PseudoVFWMSAC">;
-defm : VPatWidenFPNegMulSacVL_VV_VF<"PseudoVFWNMSAC">;
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfmadd_vl, "PseudoVFWMACC">;
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfnmadd_vl, "PseudoVFWNMACC">;
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfmsub_vl, "PseudoVFWMSAC">;
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfnmsub_vl, "PseudoVFWNMSAC">;
 
 // 14.11. Vector Floating-Point MIN/MAX Instructions
 defm : VPatBinaryFPVL_VV_VF<riscv_fminnum_vl, "PseudoVFMIN">;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index 86cdf2b5b6403..fdb42f87ea457 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -1466,7 +1466,8 @@ define <vscale x 1 x half> @vfnmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; CHECK-LABEL: vfnmadd_vv_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
   %negc = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x i1> %m, i32 %evl)
@@ -1638,7 +1639,8 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; CHECK-LABEL: vfnmsub_vv_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
   %negc = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x i1> %m, i32 %evl)
@@ -1885,7 +1887,8 @@ define <vscale x 2 x half> @vfnmadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; CHECK-LABEL: vfnmadd_vv_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
   %negc = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %c, <vscale x 2 x i1> %m, i32 %evl)
@@ -2057,7 +2060,8 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; CHECK-LABEL: vfnmsub_vv_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
   %negc = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %c, <vscale x 2 x i1> %m, i32 %evl)
@@ -2304,7 +2308,8 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; CHECK-LABEL: vfnmadd_vv_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
   %negc = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -2476,7 +2481,8 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; CHECK-LABEL: vfnmsub_vv_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
   %negc = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -2723,7 +2729,8 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: vfnmadd_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vfnmadd.vv v10, v8, v12, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
   %negc = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x i1> %m, i32 %evl)
@@ -2895,7 +2902,8 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: vfnmsub_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vfnmadd.vv v10, v8, v12, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
   %negc = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x i1> %m, i32 %evl)
@@ -3142,7 +3150,8 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: vfnmadd_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vfnmadd.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
   %negc = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %c, <vscale x 16 x i1> %m, i32 %evl)
@@ -3314,7 +3323,8 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: vfnmsub_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vfnmadd.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
   %negc = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %c, <vscale x 16 x i1> %m, i32 %evl)
@@ -3564,7 +3574,8 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vfnmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 %evl)
   %negc = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %c, <vscale x 32 x i1> %m, i32 %evl)
@@ -3740,7 +3751,8 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vfnmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 %evl)
   %negc = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %c, <vscale x 32 x i1> %m, i32 %evl)
@@ -3990,7 +4002,8 @@ define <vscale x 1 x float> @vfnmadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscal
 ; CHECK-LABEL: vfnmadd_vv_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
   %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
@@ -4162,7 +4175,8 @@ define <vscale x 1 x float> @vfnmsub_vv_nxv1f32(<vscale x 1 x float> %va, <vscal
 ; CHECK-LABEL: vfnmsub_vv_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
   %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
@@ -4409,7 +4423,8 @@ define <vscale x 2 x float> @vfnmadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscal
 ; CHECK-LABEL: vfnmadd_vv_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
   %negc = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
@@ -4581,7 +4596,8 @@ define <vscale x 2 x float> @vfnmsub_vv_nxv2f32(<vscale x 2 x float> %va, <vscal
 ; CHECK-LABEL: vfnmsub_vv_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
   %negc = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
@@ -4828,7 +4844,8 @@ define <vscale x 4 x float> @vfnmadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
 ; CHECK-LABEL: vfnmadd_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vfnmadd.vv v10, v8, v12, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
   %negc = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -5000,7 +5017,8 @@ define <vscale x 4 x float> @vfnmsub_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
 ; CHECK-LABEL: vfnmsub_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vfnmadd.vv v10, v8, v12, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
   %negc = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -5247,7 +5265,8 @@ define <vscale x 8 x float> @vfnmadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
 ; CHECK-LABEL: vfnmadd_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vfnmadd.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
   %negc = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
@@ -5419,7 +5438,8 @@ define <vscale x 8 x float> @vfnmsub_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
 ; CHECK-LABEL: vfnmsub_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vfnmadd.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
   %negc = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
@@ -5669,7 +5689,8 @@ define <vscale x 16 x float> @vfnmadd_vv_nxv16f32(<vscale x 16 x float> %va, <vs
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vfnmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 %evl)
   %negc = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl)
@@ -5845,7 +5866,8 @@ define <vscale x 16 x float> @vfnmsub_vv_nxv16f32(<vscale x 16 x float> %va, <vs
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vfnmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 %evl)
   %negc = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl)
@@ -6095,7 +6117,8 @@ define <vscale x 1 x double> @vfnmadd_vv_nxv1f64(<vscale x 1 x double> %va, <vsc
 ; CHECK-LABEL: vfnmadd_vv_nxv1f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 %evl)
   %negc = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
@@ -6267,7 +6290,8 @@ define <vscale x 1 x double> @vfnmsub_vv_nxv1f64(<vscale x 1 x double> %va, <vsc
 ; CHECK-LABEL: vfnmsub_vv_nxv1f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vfnmadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 %evl)
   %negc = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
@@ -6514,7 +6538,8 @@ define <vscale x 2 x double> @vfnmadd_vv_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfnmadd_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vfnmadd.vv v10, v8, v12, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 %evl)
   %negc = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
@@ -6686,7 +6711,8 @@ define <vscale x 2 x double> @vfnmsub_vv_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfnmsub_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vfnmadd.vv v10, v8, v12, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 %evl)
   %negc = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
@@ -6933,7 +6959,8 @@ define <vscale x 4 x double> @vfnmadd_vv_nxv4f64(<vscale x 4 x double> %va, <vsc
 ; CHECK-LABEL: vfnmadd_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vfnmadd.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 %evl)
   %negc = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -7105,7 +7132,8 @@ define <vscale x 4 x double> @vfnmsub_vv_nxv4f64(<vscale x 4 x double> %va, <vsc
 ; CHECK-LABEL: vfnmsub_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vfnmadd.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 %evl)
   %negc = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -7355,7 +7383,8 @@ define <vscale x 8 x double> @vfnmadd_vv_nxv8f64(<vscale x 8 x double> %va, <vsc
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vfnmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 %evl)
   %negc = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
@@ -7531,7 +7560,8 @@ define <vscale x 8 x double> @vfnmsub_vv_nxv8f64(<vscale x 8 x double> %va, <vsc
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vfnmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %negb = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 %evl)
   %negc = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)