[llvm] [RISCV][VLOPT] Add support for checkUsers when UserMI is a Single-Width Integer Reduction (PR #120345)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 3 05:59:55 PST 2025


https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/120345

>From eae73449bbe3f5e63f9d32954a9798924e5c8789 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 17:50:34 -0800
Subject: [PATCH 01/18] [RISCV][VLOPT] Add support for checkUsers when UserMI
 is an reduction

Reductions are weird because for some operands, they are vector registers but
only read the first lane. For these operands, we do not need to check to make
sure the EEW and EMUL ratios match. However, we need to make sure that when
the reduction instruction has a non-zero VL operand, we don't try and set the
CommonVL=0. Since this is an edge case, we decide just not to optimize anything
when this occurs.
---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    | 42 +++++++++++++++++--
 .../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 19 +++++++++
 llvm/test/CodeGen/RISCV/rvv/vl-opt.mir        | 15 ++++++-
 3 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 85ea5a23e8f293..924a8b0086e029 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -621,6 +621,23 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
     return OperandInfo(MIVLMul, MILog2SEW);
   }
 
+  // Vector Reduction Operations
+  // Vector Single-Width Integer Reduction Instructions
+  // The Dest and VS1 only read element 0 of the vector register. Return unknown
+  // for these. VS2 has EEW=SEW and EMUL=LMUL.
+  case RISCV::VREDAND_VS:
+  case RISCV::VREDMAX_VS:
+  case RISCV::VREDMAXU_VS:
+  case RISCV::VREDMIN_VS:
+  case RISCV::VREDMINU_VS:
+  case RISCV::VREDOR_VS:
+  case RISCV::VREDSUM_VS:
+  case RISCV::VREDXOR_VS: {
+    if (MO.getOperandNo() == 2)
+      return OperandInfo(MIVLMul, MILog2SEW);
+    return {};
+  }
+
   default:
     return {};
   }
@@ -943,11 +960,28 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
 
     // Instructions like reductions may use a vector register as a scalar
     // register. In this case, we should treat it like a scalar register which
-    // does not impact the decision on whether to optimize VL.
-    // TODO: Treat it like a scalar register instead of bailing out.
+    // does not impact the decision on whether to optimize VL. But if there is
+    // another user of MI and it has VL=0, we need to be sure not to reduce the
+    // VL of MI to zero when the VLOp of UserOp is may be non-zero.
     if (isVectorOpUsedAsScalarOp(UserOp)) {
-      CanReduceVL = false;
-      break;
+      [[maybe_unused]] Register R = UserOp.getReg();
+      [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
+      assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
+             "Expect LMUL 1 register class for vector as scalar operands!");
+      LLVM_DEBUG(dbgs() << "    Used this operand as a scalar operand\n");
+      const MCInstrDesc &Desc = UserMI.getDesc();
+      unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
+      const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
+      if ((VLOp.isReg() && VLOp.getReg() != RISCV::X0) ||
+          (VLOp.isImm() && VLOp.getImm() != 0)) {
+        if (!CommonVL) {
+          CommonVL = &VLOp;
+          continue;
+        } else if (!CommonVL->isIdenticalTo(VLOp)) {
+          CanReduceVL = false;
+          break;
+        }
+      }
     }
 
     if (mayReadPastVL(UserMI)) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index b304769b27731f..75abf775b206a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1064,3 +1064,22 @@ body: |
     %x:vr = PseudoVMAND_MM_B1 $noreg, $noreg, -1, 0
     %y:vr = PseudoVIOTA_M_MF2 $noreg, %x, 1, 3 /* e8 */, 0
 ...
+name: vred_vs2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_vs2
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0
+...
+---
+name: vred_vs1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_vs1
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 3f966b036589fd..d5d0db25129460 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -38,7 +38,7 @@ name: use_largest_common_vl_imm_imm
 body: |
   bb.0:
     ; CHECK-LABEL: name: use_largest_common_vl_imm_imm
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
@@ -110,4 +110,15 @@ body: |
     %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0
     %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, -1, 3 /* e8 */, 0
 ...
-
+---
+name: vred_other_user_is_vl0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_other_user_is_vl0
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
+    %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0
+...

>From 56c9f43cb4e6e61135a97d443a8c431e3a9e450d Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 18:09:51 -0800
Subject: [PATCH 02/18] fixup! add more incompat tests

---
 .../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index 75abf775b206a4..fa64e2744e26c3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1083,3 +1083,34 @@ body: |
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
     %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
 ...
+---
+name: vred_vs1_vs2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_vs1_vs2
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0
+...
+---
+name: vred_vs1_vs2_incompatible_eew
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_vs1_vs2_incompatible_eew
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 4 /* e16 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 4 /* e16 */, 0
+...
+---
+name: vred_vs1_vs2_incomaptible_emul
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_vs1_vs2_incomaptible_emul
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_MF2_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDAND_VS_MF2_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0
+...
+

>From 358428117318fd6add3525f664866f8a446fa37f Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 18:17:35 -0800
Subject: [PATCH 03/18] fixup! do not else after continue/break

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 924a8b0086e029..6f620e1fbdecf3 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -977,10 +977,12 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
         if (!CommonVL) {
           CommonVL = &VLOp;
           continue;
-        } else if (!CommonVL->isIdenticalTo(VLOp)) {
+        }
+        if (!CommonVL->isIdenticalTo(VLOp)) {
           CanReduceVL = false;
           break;
         }
+        continue;
       }
     }
 

>From 3c2a5546b630bbbeddba664fe35730c4151ed382 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 18:21:05 -0800
Subject: [PATCH 04/18] fixup! rerun test checks

---
 llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index fa64e2744e26c3..b605a76bcf5b4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1088,7 +1088,7 @@ name: vred_vs1_vs2
 body: |
   bb.0:
     ; CHECK-LABEL: name: vred_vs1_vs2
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
     %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0

>From 6aa3d728ff179f7f68c4d6fea436180f10183658 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 18:29:33 -0800
Subject: [PATCH 05/18] fixup! move continue

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 6f620e1fbdecf3..e242830a7af206 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -982,8 +982,8 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
           CanReduceVL = false;
           break;
         }
-        continue;
       }
+      continue;
     }
 
     if (mayReadPastVL(UserMI)) {

>From c67245c82155dc27924ad2f2dc82565d1a6c1f34 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 18:30:34 -0800
Subject: [PATCH 06/18] fixup! VL operand is never x0

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index e242830a7af206..f2110b0e424365 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -972,8 +972,7 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
       const MCInstrDesc &Desc = UserMI.getDesc();
       unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
       const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
-      if ((VLOp.isReg() && VLOp.getReg() != RISCV::X0) ||
-          (VLOp.isImm() && VLOp.getImm() != 0)) {
+      if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0)) {
         if (!CommonVL) {
           CommonVL = &VLOp;
           continue;

>From 69291c003b8d620b639fee6e25c0077075ac71f2 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 18:35:27 -0800
Subject: [PATCH 07/18] fixup! fix typo

---
 llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index b605a76bcf5b4e..9e18362a41a51c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1104,10 +1104,10 @@ body: |
     %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 4 /* e16 */, 0
 ...
 ---
-name: vred_vs1_vs2_incomaptible_emul
+name: vred_vs1_vs2_incompatible_emul
 body: |
   bb.0:
-    ; CHECK-LABEL: name: vred_vs1_vs2_incomaptible_emul
+    ; CHECK-LABEL: name: vred_vs1_vs2_incompatible_emul
     ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_MF2_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0

>From d1113d220908932f05b1dd607d801e4d1d2beb6c Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Dec 2024 13:45:26 -0800
Subject: [PATCH 08/18] fixup! be less conservative

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    | 18 ++++----
 llvm/test/CodeGen/RISCV/double_reduct.ll      | 30 +++++++-------
 .../rvv/fixed-vectors-reduction-formation.ll  | 41 +++++++------------
 .../CodeGen/RISCV/rvv/fold-binary-reduce.ll   |  6 +--
 4 files changed, 39 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index f2110b0e424365..596ea1b4167fcf 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -947,6 +947,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   return true;
 }
 
+static MachineOperand One = MachineOperand::CreateImm(1);
+
 bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
                                   MachineInstr &MI) {
   // FIXME: Avoid visiting each user for each time we visit something on the
@@ -961,8 +963,9 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
     // Instructions like reductions may use a vector register as a scalar
     // register. In this case, we should treat it like a scalar register which
     // does not impact the decision on whether to optimize VL. But if there is
-    // another user of MI and it has VL=0, we need to be sure not to reduce the
-    // VL of MI to zero when the VLOp of UserOp is may be non-zero.
+    // another user of MI and it may have VL=0, we need to be sure not to reduce
+    // the VL of MI to zero when the VLOp of UserOp is may be non-zero. The most
+    // we can reduce it to is one.
     if (isVectorOpUsedAsScalarOp(UserOp)) {
       [[maybe_unused]] Register R = UserOp.getReg();
       [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
@@ -973,16 +976,9 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
       unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
       const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
       if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0)) {
-        if (!CommonVL) {
-          CommonVL = &VLOp;
-          continue;
-        }
-        if (!CommonVL->isIdenticalTo(VLOp)) {
-          CanReduceVL = false;
-          break;
-        }
+        CommonVL = &One;
+        continue;
       }
-      continue;
     }
 
     if (mayReadPastVL(UserMI)) {
diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll
index cecdd77a079e42..25228b21ef0554 100644
--- a/llvm/test/CodeGen/RISCV/double_reduct.ll
+++ b/llvm/test/CodeGen/RISCV/double_reduct.ll
@@ -25,14 +25,14 @@ define float @fmul_f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v10, v8, 2
 ; CHECK-NEXT:    vfmul.vv v8, v8, v10
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vfmul.vv v9, v9, v10
 ; CHECK-NEXT:    vrgather.vi v10, v8, 1
 ; CHECK-NEXT:    vfmul.vv v8, v8, v10
-; CHECK-NEXT:    vrgather.vi v10, v9, 1
-; CHECK-NEXT:    vfmul.vv v9, v9, v10
 ; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vfmv.f.s fa4, v9
+; CHECK-NEXT:    vslidedown.vi v8, v9, 2
+; CHECK-NEXT:    vfmul.vv v8, v9, v8
+; CHECK-NEXT:    vrgather.vi v9, v8, 1
+; CHECK-NEXT:    vfmul.vv v8, v8, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v8
 ; CHECK-NEXT:    fmul.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
@@ -130,14 +130,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vslidedown.vi v10, v8, 2
 ; RV32-NEXT:    vmul.vv v8, v8, v10
-; RV32-NEXT:    vslidedown.vi v10, v9, 2
-; RV32-NEXT:    vmul.vv v9, v9, v10
 ; RV32-NEXT:    vrgather.vi v10, v8, 1
 ; RV32-NEXT:    vmul.vv v8, v8, v10
-; RV32-NEXT:    vrgather.vi v10, v9, 1
-; RV32-NEXT:    vmul.vv v9, v9, v10
 ; RV32-NEXT:    vmv.x.s a0, v8
-; RV32-NEXT:    vmv.x.s a1, v9
+; RV32-NEXT:    vslidedown.vi v8, v9, 2
+; RV32-NEXT:    vmul.vv v8, v9, v8
+; RV32-NEXT:    vrgather.vi v9, v8, 1
+; RV32-NEXT:    vmul.vv v8, v8, v9
+; RV32-NEXT:    vmv.x.s a1, v8
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    ret
 ;
@@ -146,14 +146,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vslidedown.vi v10, v8, 2
 ; RV64-NEXT:    vmul.vv v8, v8, v10
-; RV64-NEXT:    vslidedown.vi v10, v9, 2
-; RV64-NEXT:    vmul.vv v9, v9, v10
 ; RV64-NEXT:    vrgather.vi v10, v8, 1
 ; RV64-NEXT:    vmul.vv v8, v8, v10
-; RV64-NEXT:    vrgather.vi v10, v9, 1
-; RV64-NEXT:    vmul.vv v9, v9, v10
 ; RV64-NEXT:    vmv.x.s a0, v8
-; RV64-NEXT:    vmv.x.s a1, v9
+; RV64-NEXT:    vslidedown.vi v8, v9, 2
+; RV64-NEXT:    vmul.vv v8, v9, v8
+; RV64-NEXT:    vrgather.vi v9, v8, 1
+; RV64-NEXT:    vmul.vv v8, v8, v9
+; RV64-NEXT:    vmv.x.s a1, v8
 ; RV64-NEXT:    mulw a0, a0, a1
 ; RV64-NEXT:    ret
   %r1 = call i32 @llvm.vector.reduce.mul.i32.v4i32(<4 x i32> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 4f0f5dd78c94b6..6f52bee591d30d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -437,8 +437,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
 ; RV32-NEXT:    vslidedown.vi v9, v8, 1
 ; RV32-NEXT:    vmv.x.s a1, v9
 ; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    vslidedown.vi v8, v8, 3
 ; RV32-NEXT:    vmv.x.s a2, v9
+; RV32-NEXT:    vslidedown.vi v8, v8, 3
 ; RV32-NEXT:    vmv.x.s a3, v8
 ; RV32-NEXT:    add a1, a1, a2
 ; RV32-NEXT:    add a0, a0, a3
@@ -452,8 +452,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
 ; RV64-NEXT:    vslidedown.vi v9, v8, 1
 ; RV64-NEXT:    vmv.x.s a1, v9
 ; RV64-NEXT:    vslidedown.vi v9, v8, 2
-; RV64-NEXT:    vslidedown.vi v8, v8, 3
 ; RV64-NEXT:    vmv.x.s a2, v9
+; RV64-NEXT:    vslidedown.vi v8, v8, 3
 ; RV64-NEXT:    vmv.x.s a3, v8
 ; RV64-NEXT:    add a1, a1, a2
 ; RV64-NEXT:    add a0, a0, a3
@@ -799,11 +799,8 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
 define float @reduce_fadd_2xf32_non_associative(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_2xf32_non_associative:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
-; CHECK-NEXT:    vfmv.f.s fa4, v8
+; CHECK-NEXT:    flw fa5, 0(a0)
+; CHECK-NEXT:    flw fa4, 4(a0)
 ; CHECK-NEXT:    fadd.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %p, align 256
@@ -835,11 +832,8 @@ define float @reduce_fadd_2xf32_reassoc_only(ptr %p) {
 define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_2xf32_ninf_only:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
-; CHECK-NEXT:    vfmv.f.s fa4, v8
+; CHECK-NEXT:    flw fa5, 0(a0)
+; CHECK-NEXT:    flw fa4, 4(a0)
 ; CHECK-NEXT:    fadd.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %p, align 256
@@ -854,15 +848,13 @@ define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
 define float @reduce_fadd_4xi32_non_associative(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_4xi32_non_associative:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    flw fa5, 12(a0)
 ; CHECK-NEXT:    lui a0, 524288
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
-; CHECK-NEXT:    vfredusum.vs v9, v8, v9
-; CHECK-NEXT:    vslidedown.vi v8, v8, 3
-; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vfmv.f.s fa4, v9
+; CHECK-NEXT:    vfredusum.vs v8, v8, v9
+; CHECK-NEXT:    vfmv.f.s fa4, v8
 ; CHECK-NEXT:    fadd.s fa0, fa4, fa5
 ; CHECK-NEXT:    ret
   %v = load <4 x float>, ptr %p, align 256
@@ -881,15 +873,10 @@ define float @reduce_fadd_4xi32_non_associative(ptr %p) {
 define float @reduce_fadd_4xi32_non_associative2(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_4xi32_non_associative2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vslidedown.vi v9, v8, 1
-; CHECK-NEXT:    vfmv.f.s fa4, v9
-; CHECK-NEXT:    vslidedown.vi v9, v8, 2
-; CHECK-NEXT:    vslidedown.vi v8, v8, 3
-; CHECK-NEXT:    vfmv.f.s fa3, v9
-; CHECK-NEXT:    vfmv.f.s fa2, v8
+; CHECK-NEXT:    flw fa5, 0(a0)
+; CHECK-NEXT:    flw fa4, 4(a0)
+; CHECK-NEXT:    flw fa3, 8(a0)
+; CHECK-NEXT:    flw fa2, 12(a0)
 ; CHECK-NEXT:    fadd.s fa5, fa5, fa4
 ; CHECK-NEXT:    fadd.s fa4, fa3, fa2
 ; CHECK-NEXT:    fadd.s fa0, fa5, fa4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
index 2fda344690bfc6..5bc1ab9820d6cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
@@ -282,10 +282,10 @@ define float @reduce_fadd4(float %x, float %y, <4 x float> %v, <4 x float> %w) {
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vfmv.s.f v10, fa0
 ; CHECK-NEXT:    vfredusum.vs v8, v8, v10
-; CHECK-NEXT:    vfmv.s.f v10, fa1
-; CHECK-NEXT:    vfredusum.vs v9, v9, v10
 ; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vfmv.f.s fa4, v9
+; CHECK-NEXT:    vfmv.s.f v8, fa1
+; CHECK-NEXT:    vfredusum.vs v8, v9, v8
+; CHECK-NEXT:    vfmv.f.s fa4, v8
 ; CHECK-NEXT:    fdiv.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
 entry:

>From 60cd462c4044c204507e755bce16d07bbe6880bb Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Dec 2024 13:57:50 -0800
Subject: [PATCH 09/18] fixup! fix tests after rebase:

---
 llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index d5d0db25129460..121dd32afde11a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -38,7 +38,7 @@ name: use_largest_common_vl_imm_imm
 body: |
   bb.0:
     ; CHECK-LABEL: name: use_largest_common_vl_imm_imm
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
@@ -115,7 +115,7 @@ name: vred_other_user_is_vl0
 body: |
   bb.0:
     ; CHECK-LABEL: name: vred_other_user_is_vl0
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0

>From 15d5ba53b60656d1e183c038f280b7350f09536d Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Dec 2024 13:59:59 -0800
Subject: [PATCH 10/18] fixup! add test case where reduction has VL=0

---
 llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 121dd32afde11a..a13043765410ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -122,3 +122,15 @@ body: |
     %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
     %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0
 ...
+---
+name: vred_both_vl0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_both_vl0
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0
+    %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0
+...

>From 0329514073c119b7c0bc25e1295f0c01eee855e5 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Dec 2024 15:21:43 -0800
Subject: [PATCH 11/18] fixup! add instructions to isVectorOpUsedAsScalarOp

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    |  3 ++
 llvm/test/CodeGen/RISCV/double_reduct.ll      | 48 +++++++++++--------
 .../test/CodeGen/RISCV/intrinsic-cttz-elts.ll |  4 ++
 .../rvv/fixed-vectors-reduction-formation.ll  | 45 ++++++++++-------
 .../CodeGen/RISCV/rvv/fold-binary-reduce.ll   |  8 ++--
 5 files changed, 67 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 596ea1b4167fcf..260033423e471e 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -857,6 +857,9 @@ static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) {
   case RISCV::VFWREDOSUM_VS:
   case RISCV::VFWREDUSUM_VS:
     return MO.getOperandNo() == 3;
+  case RISCV::VMV_X_S:
+  case RISCV::VFMV_F_S:
+    return MO.getOperandNo() == 2;
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll
index 25228b21ef0554..b74922761ad7c6 100644
--- a/llvm/test/CodeGen/RISCV/double_reduct.ll
+++ b/llvm/test/CodeGen/RISCV/double_reduct.ll
@@ -25,14 +25,14 @@ define float @fmul_f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v10, v8, 2
 ; CHECK-NEXT:    vfmul.vv v8, v8, v10
+; CHECK-NEXT:    vslidedown.vi v10, v9, 2
+; CHECK-NEXT:    vfmul.vv v9, v9, v10
 ; CHECK-NEXT:    vrgather.vi v10, v8, 1
 ; CHECK-NEXT:    vfmul.vv v8, v8, v10
+; CHECK-NEXT:    vrgather.vi v10, v9, 1
+; CHECK-NEXT:    vfmul.vv v9, v9, v10
 ; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vslidedown.vi v8, v9, 2
-; CHECK-NEXT:    vfmul.vv v8, v9, v8
-; CHECK-NEXT:    vrgather.vi v9, v8, 1
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    vfmv.f.s fa4, v8
+; CHECK-NEXT:    vfmv.f.s fa4, v9
 ; CHECK-NEXT:    fmul.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
@@ -130,14 +130,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vslidedown.vi v10, v8, 2
 ; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    vslidedown.vi v10, v9, 2
+; RV32-NEXT:    vmul.vv v9, v9, v10
 ; RV32-NEXT:    vrgather.vi v10, v8, 1
 ; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    vrgather.vi v10, v9, 1
+; RV32-NEXT:    vmul.vv v9, v9, v10
 ; RV32-NEXT:    vmv.x.s a0, v8
-; RV32-NEXT:    vslidedown.vi v8, v9, 2
-; RV32-NEXT:    vmul.vv v8, v9, v8
-; RV32-NEXT:    vrgather.vi v9, v8, 1
-; RV32-NEXT:    vmul.vv v8, v8, v9
-; RV32-NEXT:    vmv.x.s a1, v8
+; RV32-NEXT:    vmv.x.s a1, v9
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    ret
 ;
@@ -146,14 +146,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vslidedown.vi v10, v8, 2
 ; RV64-NEXT:    vmul.vv v8, v8, v10
+; RV64-NEXT:    vslidedown.vi v10, v9, 2
+; RV64-NEXT:    vmul.vv v9, v9, v10
 ; RV64-NEXT:    vrgather.vi v10, v8, 1
 ; RV64-NEXT:    vmul.vv v8, v8, v10
+; RV64-NEXT:    vrgather.vi v10, v9, 1
+; RV64-NEXT:    vmul.vv v9, v9, v10
 ; RV64-NEXT:    vmv.x.s a0, v8
-; RV64-NEXT:    vslidedown.vi v8, v9, 2
-; RV64-NEXT:    vmul.vv v8, v9, v8
-; RV64-NEXT:    vrgather.vi v9, v8, 1
-; RV64-NEXT:    vmul.vv v8, v8, v9
-; RV64-NEXT:    vmv.x.s a1, v8
+; RV64-NEXT:    vmv.x.s a1, v9
 ; RV64-NEXT:    mulw a0, a0, a1
 ; RV64-NEXT:    ret
   %r1 = call i32 @llvm.vector.reduce.mul.i32.v4i32(<4 x i32> %a)
@@ -165,8 +165,9 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: and_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vredand.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -179,8 +180,9 @@ define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @or_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: or_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vredor.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -208,8 +210,9 @@ define i32 @xor_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: umin_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vminu.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vredminu.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -222,8 +225,9 @@ define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: umax_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -236,8 +240,9 @@ define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: smin_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vmin.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -250,8 +255,9 @@ define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @smax_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: smax_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
index 94b717b42e92b6..f545d92fe9a0f1 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
@@ -12,9 +12,11 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vmv.v.i v8, 0
 ; RV32-NEXT:    vmerge.vim v8, v8, -1, v0
+; RV32-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV32-NEXT:    vid.v v9
 ; RV32-NEXT:    vrsub.vi v9, v9, 4
 ; RV32-NEXT:    vand.vv v8, v8, v9
+; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; RV32-NEXT:    vredmaxu.vs v8, v8, v8
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    li a1, 4
@@ -29,9 +31,11 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vmv.v.i v8, 0
 ; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
+; RV64-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV64-NEXT:    vid.v v9
 ; RV64-NEXT:    vrsub.vi v9, v9, 4
 ; RV64-NEXT:    vand.vv v8, v8, v9
+; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; RV64-NEXT:    vredmaxu.vs v8, v8, v8
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    li a1, 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 6f52bee591d30d..bf8baafc4a25db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -437,8 +437,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
 ; RV32-NEXT:    vslidedown.vi v9, v8, 1
 ; RV32-NEXT:    vmv.x.s a1, v9
 ; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    vmv.x.s a2, v9
 ; RV32-NEXT:    vslidedown.vi v8, v8, 3
+; RV32-NEXT:    vmv.x.s a2, v9
 ; RV32-NEXT:    vmv.x.s a3, v8
 ; RV32-NEXT:    add a1, a1, a2
 ; RV32-NEXT:    add a0, a0, a3
@@ -452,8 +452,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
 ; RV64-NEXT:    vslidedown.vi v9, v8, 1
 ; RV64-NEXT:    vmv.x.s a1, v9
 ; RV64-NEXT:    vslidedown.vi v9, v8, 2
-; RV64-NEXT:    vmv.x.s a2, v9
 ; RV64-NEXT:    vslidedown.vi v8, v8, 3
+; RV64-NEXT:    vmv.x.s a2, v9
 ; RV64-NEXT:    vmv.x.s a3, v8
 ; RV64-NEXT:    add a1, a1, a2
 ; RV64-NEXT:    add a0, a0, a3
@@ -530,7 +530,7 @@ define i32 @reduce_and_16xi32_prefix5(ptr %p) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 5, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 5, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v10, -1
 ; CHECK-NEXT:    vsetivli zero, 5, e32, m2, ta, ma
 ; CHECK-NEXT:    vredand.vs v8, v8, v10
@@ -725,7 +725,7 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 5, e32, m2, ta, ma
 ; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 5, e32, m1, ta, ma
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.i v10, -1
 ; RV32-NEXT:    vsetivli zero, 5, e32, m2, ta, ma
 ; RV32-NEXT:    vredminu.vs v8, v8, v10
@@ -799,8 +799,11 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
 define float @reduce_fadd_2xf32_non_associative(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_2xf32_non_associative:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    flw fa5, 0(a0)
-; CHECK-NEXT:    flw fa4, 4(a0)
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vfmv.f.s fa4, v8
 ; CHECK-NEXT:    fadd.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %p, align 256
@@ -832,8 +835,11 @@ define float @reduce_fadd_2xf32_reassoc_only(ptr %p) {
 define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_2xf32_ninf_only:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    flw fa5, 0(a0)
-; CHECK-NEXT:    flw fa4, 4(a0)
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vfmv.f.s fa4, v8
 ; CHECK-NEXT:    fadd.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %p, align 256
@@ -848,13 +854,15 @@ define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
 define float @reduce_fadd_4xi32_non_associative(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_4xi32_non_associative:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    flw fa5, 12(a0)
 ; CHECK-NEXT:    lui a0, 524288
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    vfredusum.vs v8, v8, v9
-; CHECK-NEXT:    vfmv.f.s fa4, v8
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vfredusum.vs v9, v8, v9
+; CHECK-NEXT:    vslidedown.vi v8, v8, 3
+; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vfmv.f.s fa4, v9
 ; CHECK-NEXT:    fadd.s fa0, fa4, fa5
 ; CHECK-NEXT:    ret
   %v = load <4 x float>, ptr %p, align 256
@@ -873,10 +881,15 @@ define float @reduce_fadd_4xi32_non_associative(ptr %p) {
 define float @reduce_fadd_4xi32_non_associative2(ptr %p) {
 ; CHECK-LABEL: reduce_fadd_4xi32_non_associative2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    flw fa5, 0(a0)
-; CHECK-NEXT:    flw fa4, 4(a0)
-; CHECK-NEXT:    flw fa3, 8(a0)
-; CHECK-NEXT:    flw fa2, 12(a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vslidedown.vi v9, v8, 1
+; CHECK-NEXT:    vfmv.f.s fa4, v9
+; CHECK-NEXT:    vslidedown.vi v9, v8, 2
+; CHECK-NEXT:    vslidedown.vi v8, v8, 3
+; CHECK-NEXT:    vfmv.f.s fa3, v9
+; CHECK-NEXT:    vfmv.f.s fa2, v8
 ; CHECK-NEXT:    fadd.s fa5, fa5, fa4
 ; CHECK-NEXT:    fadd.s fa4, fa3, fa2
 ; CHECK-NEXT:    fadd.s fa0, fa5, fa4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
index 5bc1ab9820d6cc..6787c8c24c87ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
@@ -18,7 +18,7 @@ entry:
 define i64 @reduce_add2(<4 x i64> %v) {
 ; CHECK-LABEL: reduce_add2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v10, 8
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vredsum.vs v8, v8, v10
@@ -282,10 +282,10 @@ define float @reduce_fadd4(float %x, float %y, <4 x float> %v, <4 x float> %w) {
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vfmv.s.f v10, fa0
 ; CHECK-NEXT:    vfredusum.vs v8, v8, v10
+; CHECK-NEXT:    vfmv.s.f v10, fa1
+; CHECK-NEXT:    vfredusum.vs v9, v9, v10
 ; CHECK-NEXT:    vfmv.f.s fa5, v8
-; CHECK-NEXT:    vfmv.s.f v8, fa1
-; CHECK-NEXT:    vfredusum.vs v8, v9, v8
-; CHECK-NEXT:    vfmv.f.s fa4, v8
+; CHECK-NEXT:    vfmv.f.s fa4, v9
 ; CHECK-NEXT:    fdiv.s fa0, fa5, fa4
 ; CHECK-NEXT:    ret
 entry:

>From 90b42ce4886ca68ca2208073ef0f0f4bceecef59 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Dec 2024 16:39:12 -0800
Subject: [PATCH 12/18] fixup! fix operand number and add special case when no
 vlop

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp           | 10 +++++++++-
 llvm/test/CodeGen/RISCV/double_reduct.ll             |  6 ++++++
 .../RISCV/rvv/fixed-vectors-reduction-int-vp.ll      |  8 ++++++++
 .../CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll | 12 +++++++++++-
 .../test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll |  4 ++--
 5 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 260033423e471e..6638e890e3fd10 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -859,7 +859,7 @@ static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) {
     return MO.getOperandNo() == 3;
   case RISCV::VMV_X_S:
   case RISCV::VFMV_F_S:
-    return MO.getOperandNo() == 2;
+    return MO.getOperandNo() == 1;
   default:
     return false;
   }
@@ -976,6 +976,14 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
              "Expect LMUL 1 register class for vector as scalar operands!");
       LLVM_DEBUG(dbgs() << "    Used this operand as a scalar operand\n");
       const MCInstrDesc &Desc = UserMI.getDesc();
+      // VMV_X_S and VFMV_F_S do not have a VL opt which would cause an assert
+      // assert failure if we called getVLOpNum. Therefore, we will set the
+      // CommonVL in that case as 1, even if it could have been set to 0.
+      if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
+        CommonVL = &One;
+        continue;
+      }
+
       unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
       const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
       if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0)) {
diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll
index b74922761ad7c6..691d17272e84f5 100644
--- a/llvm/test/CodeGen/RISCV/double_reduct.ll
+++ b/llvm/test/CodeGen/RISCV/double_reduct.ll
@@ -133,8 +133,11 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 ; RV32-NEXT:    vslidedown.vi v10, v9, 2
 ; RV32-NEXT:    vmul.vv v9, v9, v10
 ; RV32-NEXT:    vrgather.vi v10, v8, 1
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vrgather.vi v10, v9, 1
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v9, v9, v10
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    vmv.x.s a1, v9
@@ -149,8 +152,11 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 ; RV64-NEXT:    vslidedown.vi v10, v9, 2
 ; RV64-NEXT:    vmul.vv v9, v9, v10
 ; RV64-NEXT:    vrgather.vi v10, v8, 1
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64-NEXT:    vmul.vv v8, v8, v10
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vrgather.vi v10, v9, 1
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64-NEXT:    vmul.vv v9, v9, v10
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    vmv.x.s a1, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index f920e39e7d295c..6a82ff0c9479ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -1456,6 +1456,7 @@ define signext i8 @vpreduce_mul_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i3
 ; RV32-NEXT:    vmv.v.i v9, 1
 ; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV32-NEXT:    vrgather.vi v9, v8, 1
+; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    mv a1, a2
@@ -1483,6 +1484,7 @@ define signext i8 @vpreduce_mul_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i3
 ; RV64-NEXT:    vmv.v.i v9, 1
 ; RV64-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV64-NEXT:    vrgather.vi v9, v8, 1
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
 ; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    mv a1, a2
@@ -1518,6 +1520,7 @@ define signext i8 @vpreduce_mul_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i3
 ; RV32-NEXT:    vslidedown.vi v9, v8, 2
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vrgather.vi v9, v8, 1
+; RV32-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    mv a1, a2
@@ -1547,6 +1550,7 @@ define signext i8 @vpreduce_mul_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i3
 ; RV64-NEXT:    vslidedown.vi v9, v8, 2
 ; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    vrgather.vi v9, v8, 1
+; RV64-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    mv a1, a2
@@ -1584,6 +1588,7 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3
 ; RV32-NEXT:    vslidedown.vi v9, v8, 2
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vrgather.vi v9, v8, 1
+; RV32-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    mv a1, a2
@@ -1615,6 +1620,7 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3
 ; RV64-NEXT:    vslidedown.vi v9, v8, 2
 ; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    vrgather.vi v9, v8, 1
+; RV64-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
 ; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    mv a1, a2
@@ -1654,6 +1660,7 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m,
 ; RV32-NEXT:    vslidedown.vi v9, v8, 2
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vrgather.vi v9, v8, 1
+; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    mv a1, a2
@@ -1687,6 +1694,7 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m,
 ; RV64-NEXT:    vslidedown.vi v9, v8, 2
 ; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    vrgather.vi v9, v8, 1
+; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    mv a1, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index 2ea618bf8a2260..98a586a5c41a51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -5935,6 +5935,7 @@ define i8 @vreduce_mul_v2i8(ptr %x) {
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    lbu a0, 1(a0)
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmul.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -5977,6 +5978,7 @@ define i8 @vreduce_mul_v4i8(ptr %x) {
 ; CHECK-NEXT:    vslidedown.vi v9, v8, 2
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vrgather.vi v9, v8, 1
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -5997,6 +5999,7 @@ define i8 @vreduce_mul_v8i8(ptr %x) {
 ; CHECK-NEXT:    vslidedown.vi v9, v8, 2
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vrgather.vi v9, v8, 1
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -6019,6 +6022,7 @@ define i8 @vreduce_mul_v16i8(ptr %x) {
 ; CHECK-NEXT:    vslidedown.vi v9, v8, 2
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vrgather.vi v9, v8, 1
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -6167,6 +6171,7 @@ define i16 @vreduce_mul_v2i16(ptr %x) {
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    lh a0, 2(a0)
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmul.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -6185,6 +6190,7 @@ define i16 @vreduce_mul_v4i16(ptr %x) {
 ; CHECK-NEXT:    vslidedown.vi v9, v8, 2
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vrgather.vi v9, v8, 1
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -6205,6 +6211,7 @@ define i16 @vreduce_mul_v8i16(ptr %x) {
 ; CHECK-NEXT:    vslidedown.vi v9, v8, 2
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vrgather.vi v9, v8, 1
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -6341,6 +6348,7 @@ define i32 @vreduce_mul_v2i32(ptr %x) {
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    lw a0, 4(a0)
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmul.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -6359,6 +6367,7 @@ define i32 @vreduce_mul_v4i32(ptr %x) {
 ; CHECK-NEXT:    vslidedown.vi v9, v8, 2
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vrgather.vi v9, v8, 1
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NEXT:    vmul.vv v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -6496,9 +6505,9 @@ define i64 @vreduce_mul_v2i64(ptr %x) {
 ; RV32-NEXT:    addi a0, a0, 8
 ; RV32-NEXT:    vlse64.v v9, (a0), zero
 ; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    vmv.x.s a0, v8
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vx v8, v8, a1
 ; RV32-NEXT:    vmv.x.s a1, v8
 ; RV32-NEXT:    ret
@@ -6508,6 +6517,7 @@ define i64 @vreduce_mul_v2i64(ptr %x) {
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    ld a0, 8(a0)
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
index 5e657a93ec0d63..fb15085cd3b5a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
@@ -21,7 +21,7 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
 ; RV32-NEXT:    vsetivli zero, 1, e8, m1, tu, ma
 ; RV32-NEXT:    vmv1r.v v9, v8
 ; RV32-NEXT:    vmv.s.x v9, a0
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
 ; RV32-NEXT:    vmseq.vi v9, v9, 0
 ; RV32-NEXT:    vmv.x.s a0, v9
 ; RV32-NEXT:    andi a3, a0, 255
@@ -48,7 +48,7 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
 ; RV64-NEXT:    vsetivli zero, 1, e8, m1, tu, ma
 ; RV64-NEXT:    vmv1r.v v9, v8
 ; RV64-NEXT:    vmv.s.x v9, a0
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
 ; RV64-NEXT:    vmseq.vi v9, v9, 0
 ; RV64-NEXT:    vmv.x.s a0, v9
 ; RV64-NEXT:    andi a3, a0, 255

>From 7f7e78191731a9714e01ecb54a6b623b528f43e9 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Dec 2024 10:37:59 -0800
Subject: [PATCH 13/18] fixup! update test

---
 .../RISCV/rvv/fixed-vectors-unaligned.ll      | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 6d9f69f436fc41..da3a323c03f93b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -481,7 +481,7 @@ declare <2 x i32> @llvm.masked.load.v2i32(ptr, i32, <2 x i1>, <2 x i32>)
 define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwind {
 ; RV32-SLOW-LABEL: masked_load_v2i32_align1:
 ; RV32-SLOW:       # %bb.0:
-; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vmseq.vi v8, v8, 0
 ; RV32-SLOW-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-SLOW-NEXT:    vmv.x.s a2, v8
@@ -499,7 +499,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV32-SLOW-NEXT:    slli a6, a6, 24
 ; RV32-SLOW-NEXT:    or a4, a6, a5
 ; RV32-SLOW-NEXT:    or a3, a4, a3
-; RV32-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vmv.v.x v8, a3
 ; RV32-SLOW-NEXT:  .LBB8_2: # %else
 ; RV32-SLOW-NEXT:    andi a2, a2, 2
@@ -515,17 +515,19 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV32-SLOW-NEXT:    slli a0, a0, 24
 ; RV32-SLOW-NEXT:    or a0, a0, a4
 ; RV32-SLOW-NEXT:    or a0, a0, a2
-; RV32-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vmv.s.x v9, a0
 ; RV32-SLOW-NEXT:    vslideup.vi v8, v9, 1
-; RV32-SLOW-NEXT:  .LBB8_4: # %else2
-; RV32-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:    vse32.v v8, (a1)
+; RV32-SLOW-NEXT:    ret
+; RV32-SLOW-NEXT:  .LBB8_4:
+; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vse32.v v8, (a1)
 ; RV32-SLOW-NEXT:    ret
 ;
 ; RV64-SLOW-LABEL: masked_load_v2i32_align1:
 ; RV64-SLOW:       # %bb.0:
-; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vmseq.vi v8, v8, 0
 ; RV64-SLOW-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-SLOW-NEXT:    vmv.x.s a2, v8
@@ -543,7 +545,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV64-SLOW-NEXT:    slli a6, a6, 24
 ; RV64-SLOW-NEXT:    or a4, a6, a5
 ; RV64-SLOW-NEXT:    or a3, a4, a3
-; RV64-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vmv.v.x v8, a3
 ; RV64-SLOW-NEXT:  .LBB8_2: # %else
 ; RV64-SLOW-NEXT:    andi a2, a2, 2
@@ -559,11 +561,13 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV64-SLOW-NEXT:    slli a0, a0, 24
 ; RV64-SLOW-NEXT:    or a0, a0, a4
 ; RV64-SLOW-NEXT:    or a0, a0, a2
-; RV64-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vmv.s.x v9, a0
 ; RV64-SLOW-NEXT:    vslideup.vi v8, v9, 1
-; RV64-SLOW-NEXT:  .LBB8_4: # %else2
-; RV64-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:    vse32.v v8, (a1)
+; RV64-SLOW-NEXT:    ret
+; RV64-SLOW-NEXT:  .LBB8_4:
+; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vse32.v v8, (a1)
 ; RV64-SLOW-NEXT:    ret
 ;
@@ -585,7 +589,7 @@ declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
 define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nounwind {
 ; SLOW-LABEL: masked_store_v2i32_align2:
 ; SLOW:       # %bb.0:
-; SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; SLOW-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; SLOW-NEXT:    vmseq.vi v9, v9, 0
 ; SLOW-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; SLOW-NEXT:    vmv.x.s a1, v9

>From 36f3bbd9dcfcd0a9ff55bb30eabb799fc947befb Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 31 Dec 2024 06:03:38 -0800
Subject: [PATCH 14/18] fixup! improve how we assign CommonVL

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 25 ++++++++++++----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 6638e890e3fd10..d1923998810af4 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -50,7 +50,9 @@ class RISCVVLOptimizer : public MachineFunctionPass {
   StringRef getPassName() const override { return PASS_NAME; }
 
 private:
-  bool checkUsers(const MachineOperand *&CommonVL, MachineInstr &MI);
+  /// Returns the largest common VL MachineOperand that may be used to optimize
+  /// MI. Returns std::nullopt if it failed to find a suitable VL.
+  std::optional<const MachineOperand> checkUsers(MachineInstr &MI);
   bool tryReduceVL(MachineInstr &MI);
   bool isCandidate(const MachineInstr &MI) const;
 };
@@ -950,15 +952,15 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   return true;
 }
 
-static MachineOperand One = MachineOperand::CreateImm(1);
-
-bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
-                                  MachineInstr &MI) {
+std::optional<const MachineOperand>
+RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
   // FIXME: Avoid visiting each user for each time we visit something on the
   // worklist, combined with an extra visit from the outer loop. Restructure
   // along lines of an instcombine style worklist which integrates the outer
   // pass.
   bool CanReduceVL = true;
+  const MachineOperand *CommonVL = nullptr;
+  const MachineOperand One = MachineOperand::CreateImm(1);
   for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
     const MachineInstr &UserMI = *UserOp.getParent();
     LLVM_DEBUG(dbgs() << "  Checking user: " << UserMI << "\n");
@@ -1044,7 +1046,9 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
       break;
     }
   }
-  return CanReduceVL;
+  return CanReduceVL && CommonVL
+             ? std::make_optional<const MachineOperand>(*CommonVL)
+             : std::nullopt;
 }
 
 bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
@@ -1056,12 +1060,11 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
     MachineInstr &MI = *Worklist.pop_back_val();
     LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
 
-    const MachineOperand *CommonVL = nullptr;
-    bool CanReduceVL = true;
-    if (isVectorRegClass(MI.getOperand(0).getReg(), MRI))
-      CanReduceVL = checkUsers(CommonVL, MI);
+    if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI))
+      continue;
 
-    if (!CanReduceVL || !CommonVL)
+    auto CommonVL = checkUsers(MI);
+    if (!CommonVL)
       continue;
 
     assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&

>From c95ace46ce49e1e7bc496322cd8dc9303839b599 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 2 Jan 2025 10:15:00 -0800
Subject: [PATCH 15/18] fixup! refactor to address preames comments

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    | 142 +++++++++++-------
 llvm/test/CodeGen/RISCV/double_reduct.ll      |  18 +--
 .../test/CodeGen/RISCV/intrinsic-cttz-elts.ll |   4 -
 .../RISCV/rvv/fixed-vectors-unaligned.ll      |  26 ++--
 .../RISCV/rvv/fold-scalar-load-crash.ll       |   4 +-
 .../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir |  37 ++++-
 llvm/test/CodeGen/RISCV/rvv/vl-opt.mir        |  25 +--
 7 files changed, 144 insertions(+), 112 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index d1923998810af4..bfad91a89d336c 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -50,6 +50,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
   StringRef getPassName() const override { return PASS_NAME; }
 
 private:
+  std::optional<const MachineOperand> getVLForUser(MachineOperand &UserOp);
   /// Returns the largest common VL MachineOperand that may be used to optimize
   /// MI. Returns std::nullopt if it failed to find a suitable VL.
   std::optional<const MachineOperand> checkUsers(MachineInstr &MI);
@@ -97,6 +98,8 @@ struct OperandInfo {
   OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
       : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW) {}
 
+  OperandInfo(unsigned Log2EEW) : S(State::Known), Log2EEW(Log2EEW) {}
+
   OperandInfo() : S(State::Unknown) {}
 
   bool isUnknown() const { return S == State::Unknown; }
@@ -109,6 +112,11 @@ struct OperandInfo {
            A.EMUL->second == B.EMUL->second;
   }
 
+  static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
+    assert(A.isKnown() && B.isKnown() && "Both operands must be known");
+    return A.Log2EEW == B.Log2EEW;
+  }
+
   void print(raw_ostream &OS) const {
     if (isUnknown()) {
       OS << "Unknown";
@@ -625,8 +633,8 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
 
   // Vector Reduction Operations
   // Vector Single-Width Integer Reduction Instructions
-  // The Dest and VS1 only read element 0 of the vector register. Return unknown
-  // for these. VS2 has EEW=SEW and EMUL=LMUL.
+  // The Dest and VS1 only read element 0 of the vector register. Return just
+  // the EEW for these. VS2 has EEW=SEW and EMUL=LMUL.
   case RISCV::VREDAND_VS:
   case RISCV::VREDMAX_VS:
   case RISCV::VREDMAXU_VS:
@@ -637,7 +645,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
   case RISCV::VREDXOR_VS: {
     if (MO.getOperandNo() == 2)
       return OperandInfo(MIVLMul, MILog2SEW);
-    return {};
+    return OperandInfo(MILog2SEW);
   }
 
   default:
@@ -952,6 +960,53 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   return true;
 }
 
+std::optional<const MachineOperand>
+RISCVVLOptimizer::getVLForUser(MachineOperand &UserOp) {
+  const MachineInstr &UserMI = *UserOp.getParent();
+  const MCInstrDesc &Desc = UserMI.getDesc();
+
+  // Instructions like reductions may use a vector register as a scalar
+  // register. In this case, we should treat it like a scalar register which
+  // does not impact the decision on whether to optimize VL. But if there is
+  // another user of MI and it may have VL=0, we need to be sure not to reduce
+  // the VL of MI to zero when the VLOp of UserOp is may be non-zero. The most
+  // we can reduce it to is one.
+  if (isVectorOpUsedAsScalarOp(UserOp)) {
+    [[maybe_unused]] Register R = UserOp.getReg();
+    [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
+    assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
+           "Expect LMUL 1 register class for vector as scalar operands!");
+    LLVM_DEBUG(dbgs() << "    Used this operand as a scalar operand\n");
+    // VMV_X_S and VFMV_F_S do not have a VL opt which would cause an assert
+    // assert failure if we called getVLOpNum. Therefore, we will set the
+    // CommonVL in that case as 1, even if it could have been set to 0.
+    if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
+      return MachineOperand::CreateImm(1);
+
+    unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
+    const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
+    if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0))
+      return MachineOperand::CreateImm(1);
+    LLVM_DEBUG(dbgs() << "    Abort because could not determine VL of vector "
+                         "operand used as scalar operand\n");
+
+    return std::nullopt;
+  }
+
+  if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
+    LLVM_DEBUG(dbgs() << "    Abort due to lack of VL, assume that"
+                         " use VLMAX\n");
+    return std::nullopt;
+  }
+
+  unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
+  const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
+  // Looking for an immediate or a register VL that isn't X0.
+  assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
+         "Did not expect X0 VL");
+  return VLOp;
+}
+
 std::optional<const MachineOperand>
 RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
   // FIXME: Avoid visiting each user for each time we visit something on the
@@ -959,41 +1014,10 @@ RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
   // along lines of an instcombine style worklist which integrates the outer
   // pass.
   bool CanReduceVL = true;
-  const MachineOperand *CommonVL = nullptr;
-  const MachineOperand One = MachineOperand::CreateImm(1);
+  std::optional<const MachineOperand> CommonVL;
   for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
     const MachineInstr &UserMI = *UserOp.getParent();
     LLVM_DEBUG(dbgs() << "  Checking user: " << UserMI << "\n");
-
-    // Instructions like reductions may use a vector register as a scalar
-    // register. In this case, we should treat it like a scalar register which
-    // does not impact the decision on whether to optimize VL. But if there is
-    // another user of MI and it may have VL=0, we need to be sure not to reduce
-    // the VL of MI to zero when the VLOp of UserOp is may be non-zero. The most
-    // we can reduce it to is one.
-    if (isVectorOpUsedAsScalarOp(UserOp)) {
-      [[maybe_unused]] Register R = UserOp.getReg();
-      [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
-      assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
-             "Expect LMUL 1 register class for vector as scalar operands!");
-      LLVM_DEBUG(dbgs() << "    Used this operand as a scalar operand\n");
-      const MCInstrDesc &Desc = UserMI.getDesc();
-      // VMV_X_S and VFMV_F_S do not have a VL opt which would cause an assert
-      // assert failure if we called getVLOpNum. Therefore, we will set the
-      // CommonVL in that case as 1, even if it could have been set to 0.
-      if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
-        CommonVL = &One;
-        continue;
-      }
-
-      unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
-      const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
-      if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0)) {
-        CommonVL = &One;
-        continue;
-      }
-    }
-
     if (mayReadPastVL(UserMI)) {
       LLVM_DEBUG(dbgs() << "    Abort because used by unsafe instruction\n");
       CanReduceVL = false;
@@ -1007,45 +1031,55 @@ RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
       break;
     }
 
-    const MCInstrDesc &Desc = UserMI.getDesc();
-    if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
-      LLVM_DEBUG(dbgs() << "    Abort due to lack of VL or SEW, assume that"
-                           " use VLMAX\n");
+    auto VLOp = getVLForUser(UserOp);
+    if (!VLOp) {
       CanReduceVL = false;
       break;
     }
 
-    unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
-    const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
-
-    // Looking for an immediate or a register VL that isn't X0.
-    assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
-           "Did not expect X0 VL");
-
     // Use the largest VL among all the users. If we cannot determine this
     // statically, then we cannot optimize the VL.
-    if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, VLOp)) {
-      CommonVL = &VLOp;
+    if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) {
+      CommonVL.emplace(*VLOp);
       LLVM_DEBUG(dbgs() << "    User VL is: " << VLOp << "\n");
-    } else if (!RISCV::isVLKnownLE(VLOp, *CommonVL)) {
+    } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) {
       LLVM_DEBUG(dbgs() << "    Abort because cannot determine a common VL\n");
       CanReduceVL = false;
       break;
     }
 
-    // The SEW and LMUL of destination and source registers need to match.
+    if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
+      LLVM_DEBUG(dbgs() << "    Abort due to lack of SEW operand\n");
+      CanReduceVL = false;
+      break;
+    }
+
     OperandInfo ConsumerInfo = getOperandInfo(UserOp, MRI);
     OperandInfo ProducerInfo = getOperandInfo(MI.getOperand(0), MRI);
-    if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown() ||
-        !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo)) {
-      LLVM_DEBUG(dbgs() << "    Abort due to incompatible or unknown "
-                           "information for EMUL or EEW.\n");
+    if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown()) {
+      LLVM_DEBUG(dbgs() << "    Abort due to unknown operand information.\n");
+      LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");
+      LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");
+      CanReduceVL = false;
+      break;
+    }
+
+    // If the operand is used as a scalar operand, then the EEW must be
+    // compatible. Otherwise, the EMUL *and* EEW must be compatible.
+    if ((isVectorOpUsedAsScalarOp(UserOp) &&
+         !OperandInfo::EEWAreEqual(ConsumerInfo, ProducerInfo)) ||
+        (!isVectorOpUsedAsScalarOp(UserOp) &&
+         !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo))) {
+      LLVM_DEBUG(
+          dbgs()
+          << "    Abort due to incompatible information for EMUL or EEW.\n");
       LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");
       LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");
       CanReduceVL = false;
       break;
     }
   }
+
   return CanReduceVL && CommonVL
              ? std::make_optional<const MachineOperand>(*CommonVL)
              : std::nullopt;
diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll
index 691d17272e84f5..e279fe6627f275 100644
--- a/llvm/test/CodeGen/RISCV/double_reduct.ll
+++ b/llvm/test/CodeGen/RISCV/double_reduct.ll
@@ -171,9 +171,8 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: and_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vand.vv v8, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vand.vv v8, v8, v9
 ; CHECK-NEXT:    vredand.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -186,9 +185,8 @@ define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @or_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: or_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vor.vv v8, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vor.vv v8, v8, v9
 ; CHECK-NEXT:    vredor.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -216,9 +214,8 @@ define i32 @xor_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: umin_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vminu.vv v8, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vminu.vv v8, v8, v9
 ; CHECK-NEXT:    vredminu.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -231,9 +228,8 @@ define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: umax_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -246,9 +242,8 @@ define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: smin_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vmin.vv v8, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmin.vv v8, v8, v9
 ; CHECK-NEXT:    vredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
@@ -261,9 +256,8 @@ define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
 define i32 @smax_i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: smax_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
index f545d92fe9a0f1..94b717b42e92b6 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
@@ -12,11 +12,9 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vmv.v.i v8, 0
 ; RV32-NEXT:    vmerge.vim v8, v8, -1, v0
-; RV32-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV32-NEXT:    vid.v v9
 ; RV32-NEXT:    vrsub.vi v9, v9, 4
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; RV32-NEXT:    vredmaxu.vs v8, v8, v8
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    li a1, 4
@@ -31,11 +29,9 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vmv.v.i v8, 0
 ; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
-; RV64-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
 ; RV64-NEXT:    vid.v v9
 ; RV64-NEXT:    vrsub.vi v9, v9, 4
 ; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; RV64-NEXT:    vredmaxu.vs v8, v8, v8
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    li a1, 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index da3a323c03f93b..6d9f69f436fc41 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -481,7 +481,7 @@ declare <2 x i32> @llvm.masked.load.v2i32(ptr, i32, <2 x i1>, <2 x i32>)
 define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwind {
 ; RV32-SLOW-LABEL: masked_load_v2i32_align1:
 ; RV32-SLOW:       # %bb.0:
-; RV32-SLOW-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vmseq.vi v8, v8, 0
 ; RV32-SLOW-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-SLOW-NEXT:    vmv.x.s a2, v8
@@ -499,7 +499,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV32-SLOW-NEXT:    slli a6, a6, 24
 ; RV32-SLOW-NEXT:    or a4, a6, a5
 ; RV32-SLOW-NEXT:    or a3, a4, a3
-; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vmv.v.x v8, a3
 ; RV32-SLOW-NEXT:  .LBB8_2: # %else
 ; RV32-SLOW-NEXT:    andi a2, a2, 2
@@ -515,19 +515,17 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV32-SLOW-NEXT:    slli a0, a0, 24
 ; RV32-SLOW-NEXT:    or a0, a0, a4
 ; RV32-SLOW-NEXT:    or a0, a0, a2
-; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vmv.s.x v9, a0
 ; RV32-SLOW-NEXT:    vslideup.vi v8, v9, 1
-; RV32-SLOW-NEXT:    vse32.v v8, (a1)
-; RV32-SLOW-NEXT:    ret
-; RV32-SLOW-NEXT:  .LBB8_4:
-; RV32-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT:  .LBB8_4: # %else2
+; RV32-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; RV32-SLOW-NEXT:    vse32.v v8, (a1)
 ; RV32-SLOW-NEXT:    ret
 ;
 ; RV64-SLOW-LABEL: masked_load_v2i32_align1:
 ; RV64-SLOW:       # %bb.0:
-; RV64-SLOW-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vmseq.vi v8, v8, 0
 ; RV64-SLOW-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-SLOW-NEXT:    vmv.x.s a2, v8
@@ -545,7 +543,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV64-SLOW-NEXT:    slli a6, a6, 24
 ; RV64-SLOW-NEXT:    or a4, a6, a5
 ; RV64-SLOW-NEXT:    or a3, a4, a3
-; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vmv.v.x v8, a3
 ; RV64-SLOW-NEXT:  .LBB8_2: # %else
 ; RV64-SLOW-NEXT:    andi a2, a2, 2
@@ -561,13 +559,11 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
 ; RV64-SLOW-NEXT:    slli a0, a0, 24
 ; RV64-SLOW-NEXT:    or a0, a0, a4
 ; RV64-SLOW-NEXT:    or a0, a0, a2
-; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vmv.s.x v9, a0
 ; RV64-SLOW-NEXT:    vslideup.vi v8, v9, 1
-; RV64-SLOW-NEXT:    vse32.v v8, (a1)
-; RV64-SLOW-NEXT:    ret
-; RV64-SLOW-NEXT:  .LBB8_4:
-; RV64-SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT:  .LBB8_4: # %else2
+; RV64-SLOW-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; RV64-SLOW-NEXT:    vse32.v v8, (a1)
 ; RV64-SLOW-NEXT:    ret
 ;
@@ -589,7 +585,7 @@ declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
 define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nounwind {
 ; SLOW-LABEL: masked_store_v2i32_align2:
 ; SLOW:       # %bb.0:
-; SLOW-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; SLOW-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; SLOW-NEXT:    vmseq.vi v9, v9, 0
 ; SLOW-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; SLOW-NEXT:    vmv.x.s a1, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
index fb15085cd3b5a6..5e657a93ec0d63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
@@ -21,7 +21,7 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
 ; RV32-NEXT:    vsetivli zero, 1, e8, m1, tu, ma
 ; RV32-NEXT:    vmv1r.v v9, v8
 ; RV32-NEXT:    vmv.s.x v9, a0
-; RV32-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    vmseq.vi v9, v9, 0
 ; RV32-NEXT:    vmv.x.s a0, v9
 ; RV32-NEXT:    andi a3, a0, 255
@@ -48,7 +48,7 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
 ; RV64-NEXT:    vsetivli zero, 1, e8, m1, tu, ma
 ; RV64-NEXT:    vmv1r.v v9, v8
 ; RV64-NEXT:    vmv.s.x v9, a0
-; RV64-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    vmseq.vi v9, v9, 0
 ; RV64-NEXT:    vmv.x.s a0, v9
 ; RV64-NEXT:    andi a3, a0, 255
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index 9e18362a41a51c..da5fdc2561bc3e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1113,4 +1113,39 @@ body: |
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
     %y:vr = PseudoVREDAND_VS_MF2_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0
 ...
-
+---
+name: vred_other_user_is_vl0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_other_user_is_vl0
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
+    %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0
+...
+---
+name: vred_both_vl0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_both_vl0
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0
+    %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0
+...
+---
+name: vred_other_user_is_vl2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vred_other_user_is_vl2
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
+    %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index a13043765410ef..3f966b036589fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -110,27 +110,4 @@ body: |
     %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0
     %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, -1, 3 /* e8 */, 0
 ...
----
-name: vred_other_user_is_vl0
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: vred_other_user_is_vl0
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
-    ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
-    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */
-    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
-    %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
-    %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0
-...
----
-name: vred_both_vl0
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: vred_both_vl0
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
-    ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0 /* tu, mu */
-    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */
-    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
-    %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0
-    %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0
-...
+

>From a4409f6761c59cadb8e454220ae6b11479c580ed Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 2 Jan 2025 17:18:58 -0800
Subject: [PATCH 16/18] fixup! fix comment

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index bfad91a89d336c..7d4c492dae1897 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -977,9 +977,9 @@ RISCVVLOptimizer::getVLForUser(MachineOperand &UserOp) {
     assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
            "Expect LMUL 1 register class for vector as scalar operands!");
     LLVM_DEBUG(dbgs() << "    Used this operand as a scalar operand\n");
-    // VMV_X_S and VFMV_F_S do not have a VL opt which would cause an assert
-    // assert failure if we called getVLOpNum. Therefore, we will set the
-    // CommonVL in that case as 1, even if it could have been set to 0.
+    // VMV_X_S and VFMV_F_S do not have a VL operand which would cause an assert
+    // failure if we called getVLOpNum. Therefore, we will return 1 in this
+    // case, even if it could have been set to 0.
     if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
       return MachineOperand::CreateImm(1);
 

>From 78e5c179ea6e2ab892c13da3ee99aae431babd0a Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 2 Jan 2025 17:20:31 -0800
Subject: [PATCH 17/18] fixup! fix typo

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 7d4c492dae1897..1f59a508e7da42 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -969,7 +969,7 @@ RISCVVLOptimizer::getVLForUser(MachineOperand &UserOp) {
   // register. In this case, we should treat it like a scalar register which
   // does not impact the decision on whether to optimize VL. But if there is
   // another user of MI and it may have VL=0, we need to be sure not to reduce
-  // the VL of MI to zero when the VLOp of UserOp is may be non-zero. The most
+  // the VL of MI to zero when the VLOp of UserOp may be non-zero. The most
   // we can reduce it to is one.
   if (isVectorOpUsedAsScalarOp(UserOp)) {
     [[maybe_unused]] Register R = UserOp.getReg();

>From e9544e7f244329f10abdd3aca1e2f62561b09a12 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 3 Jan 2025 05:53:28 -0800
Subject: [PATCH 18/18] fixup! respond to review

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 45 ++++++++--------------
 1 file changed, 17 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 1f59a508e7da42..d8e371c9b64889 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -50,10 +50,10 @@ class RISCVVLOptimizer : public MachineFunctionPass {
   StringRef getPassName() const override { return PASS_NAME; }
 
 private:
-  std::optional<const MachineOperand> getVLForUser(MachineOperand &UserOp);
+  std::optional<MachineOperand> getVLForUser(MachineOperand &UserOp);
   /// Returns the largest common VL MachineOperand that may be used to optimize
   /// MI. Returns std::nullopt if it failed to find a suitable VL.
-  std::optional<const MachineOperand> checkUsers(MachineInstr &MI);
+  std::optional<MachineOperand> checkUsers(MachineInstr &MI);
   bool tryReduceVL(MachineInstr &MI);
   bool isCandidate(const MachineInstr &MI) const;
 };
@@ -960,7 +960,7 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   return true;
 }
 
-std::optional<const MachineOperand>
+std::optional<MachineOperand>
 RISCVVLOptimizer::getVLForUser(MachineOperand &UserOp) {
   const MachineInstr &UserMI = *UserOp.getParent();
   const MCInstrDesc &Desc = UserMI.getDesc();
@@ -1007,35 +1007,29 @@ RISCVVLOptimizer::getVLForUser(MachineOperand &UserOp) {
   return VLOp;
 }
 
-std::optional<const MachineOperand>
-RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
+std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
   // FIXME: Avoid visiting each user for each time we visit something on the
   // worklist, combined with an extra visit from the outer loop. Restructure
   // along lines of an instcombine style worklist which integrates the outer
   // pass.
-  bool CanReduceVL = true;
-  std::optional<const MachineOperand> CommonVL;
+  std::optional<MachineOperand> CommonVL;
   for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
     const MachineInstr &UserMI = *UserOp.getParent();
     LLVM_DEBUG(dbgs() << "  Checking user: " << UserMI << "\n");
     if (mayReadPastVL(UserMI)) {
       LLVM_DEBUG(dbgs() << "    Abort because used by unsafe instruction\n");
-      CanReduceVL = false;
-      break;
+      return std::nullopt;
     }
 
     // Tied operands might pass through.
     if (UserOp.isTied()) {
       LLVM_DEBUG(dbgs() << "    Abort because user used as tied operand\n");
-      CanReduceVL = false;
-      break;
+      return std::nullopt;
     }
 
     auto VLOp = getVLForUser(UserOp);
-    if (!VLOp) {
-      CanReduceVL = false;
-      break;
-    }
+    if (!VLOp)
+      return std::nullopt;
 
     // Use the largest VL among all the users. If we cannot determine this
     // statically, then we cannot optimize the VL.
@@ -1044,14 +1038,12 @@ RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
       LLVM_DEBUG(dbgs() << "    User VL is: " << VLOp << "\n");
     } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) {
       LLVM_DEBUG(dbgs() << "    Abort because cannot determine a common VL\n");
-      CanReduceVL = false;
-      break;
+      return std::nullopt;
     }
 
     if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
       LLVM_DEBUG(dbgs() << "    Abort due to lack of SEW operand\n");
-      CanReduceVL = false;
-      break;
+      return std::nullopt;
     }
 
     OperandInfo ConsumerInfo = getOperandInfo(UserOp, MRI);
@@ -1060,29 +1052,26 @@ RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
       LLVM_DEBUG(dbgs() << "    Abort due to unknown operand information.\n");
       LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");
       LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");
-      CanReduceVL = false;
-      break;
+      return std::nullopt;
     }
 
     // If the operand is used as a scalar operand, then the EEW must be
     // compatible. Otherwise, the EMUL *and* EEW must be compatible.
-    if ((isVectorOpUsedAsScalarOp(UserOp) &&
+    bool IsVectorOpUsedAsScalarOp = isVectorOpUsedAsScalarOp(UserOp);
+    if ((IsVectorOpUsedAsScalarOp &&
          !OperandInfo::EEWAreEqual(ConsumerInfo, ProducerInfo)) ||
-        (!isVectorOpUsedAsScalarOp(UserOp) &&
+        (!IsVectorOpUsedAsScalarOp &&
          !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo))) {
       LLVM_DEBUG(
           dbgs()
           << "    Abort due to incompatible information for EMUL or EEW.\n");
       LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");
       LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");
-      CanReduceVL = false;
-      break;
+      return std::nullopt;
     }
   }
 
-  return CanReduceVL && CommonVL
-             ? std::make_optional<const MachineOperand>(*CommonVL)
-             : std::nullopt;
+  return CommonVL;
 }
 
 bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {



More information about the llvm-commits mailing list