[llvm] [RISCV][RVV] Prohibit conversion of scalar store to single-element vse if vmv.x.s has multiple uses (PR #152112)

Tue Aug 5 03:12:21 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Sergey Kachkov (skachkov-sc)

<details>
<summary>Changes</summary>

Godbolt example: https://godbolt.org/z/ThdfP475a

In the example single-element vse is used to store reduction result instead of scalar store ([this optimization was introduced by this patch](https://reviews.llvm.org/D109482)). However, vmv.x.s can't be eliminated here because it has other uses (e.g. CopyToReg), so it seems more profitable to use scalar store (we already have store value in a scalar register, and can save one vsetvli which is likely to be required for single-element vse). The proposed solution is to this transform only if vmv.x.s has one use (in store instruction)

---
Full diff: https://github.com/llvm/llvm-project/pull/152112.diff


3 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+5-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+8-10) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll (+1-2) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9e1deea93b971..59583bd9bf48a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20610,10 +20610,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
     // vfmv.f.s is represented as extract element from 0. Match it late to avoid
     // any illegal types.
-    if (Val.getOpcode() == RISCVISD::VMV_X_S ||
-        (DCI.isAfterLegalizeDAG() &&
-         Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-         isNullConstant(Val.getOperand(1)))) {
+    if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
+         (DCI.isAfterLegalizeDAG() &&
+          Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          isNullConstant(Val.getOperand(1)))) &&
+        Val.hasOneUse()) {
       SDValue Src = Val.getOperand(0);
       MVT VecVT = Src.getSimpleValueType();
       // VecVT should be scalable and memory VT should match the element type.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 67d55366674f3..4c35b2506d3e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2047,19 +2047,17 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
 ;
 ; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
 ; RV32ZVE32F:       # %bb.0:
-; RV32ZVE32F-NEXT:    addi a1, a0, 8
 ; RV32ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
 ; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
 ; RV32ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
-; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
-; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-NEXT:    vse32.v v9, (a0)
-; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
-; RV32ZVE32F-NEXT:    srai a2, a2, 31
-; RV32ZVE32F-NEXT:    vse32.v v8, (a1)
-; RV32ZVE32F-NEXT:    srai a3, a3, 31
-; RV32ZVE32F-NEXT:    sw a2, 4(a0)
-; RV32ZVE32F-NEXT:    sw a3, 12(a0)
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v9
+; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV32ZVE32F-NEXT:    srai a3, a1, 31
+; RV32ZVE32F-NEXT:    srai a4, a2, 31
+; RV32ZVE32F-NEXT:    sw a1, 0(a0)
+; RV32ZVE32F-NEXT:    sw a3, 4(a0)
+; RV32ZVE32F-NEXT:    sw a2, 8(a0)
+; RV32ZVE32F-NEXT:    sw a4, 12(a0)
 ; RV32ZVE32F-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
index 6787c8c24c87e..143545ccfd4f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
@@ -266,8 +266,7 @@ define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) {
 ; CHECK-NEXT:    vfredusum.vs v8, v8, v9
 ; CHECK-NEXT:    vfmv.f.s fa5, v8
 ; CHECK-NEXT:    fadd.s fa0, fa5, fa0
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    fsw fa5, 0(a0)
 ; CHECK-NEXT:    ret
 entry:
   %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v)

``````````

</details>


https://github.com/llvm/llvm-project/pull/152112