[llvm] [RISCV] Add a combine to form masked.store from unit strided store (PR #66677)

Mon Sep 18 10:49:12 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

<details>
<summary>Changes</summary>

Add a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR #65674.

As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering.  We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store.

---
Full diff: https://github.com/llvm/llvm-project/pull/66677.diff


4 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+18) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+2-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll (+1-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll (+1-2) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index de58335b435651c..46482cfcfe25ea5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14368,6 +14368,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                  ISD::UNINDEXED, ISD::NON_EXTLOAD);
       return SDValue();
     }
+    case Intrinsic::riscv_masked_strided_store: {
+      auto *Store = cast<MemIntrinsicSDNode>(N);
+      SDValue Value = N->getOperand(2);
+      SDValue Base = N->getOperand(3);
+      SDValue Stride = N->getOperand(4);
+      SDValue Mask = N->getOperand(5);
+
+      // If the stride is equal to the element size in bytes,  we can use
+      // a masked.store.
+      const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
+      if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+          StrideC && StrideC->getZExtValue() == ElementSize)
+        return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
+                                  DAG.getUNDEF(XLenVT), Mask,
+                                  Store->getMemoryVT(), Store->getMemOperand(),
+                                  ISD::UNINDEXED, false);
+      return SDValue();
+    }
     case Intrinsic::riscv_vcpop:
     case Intrinsic::riscv_vcpop_mask:
     case Intrinsic::riscv_vfirst:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 7309fb9edec0657..60b61e889315cfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -11296,9 +11296,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
 define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
 ; CHECK-LABEL: mscatter_unit_stride:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 2
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vsse16.v v8, (a0), a1
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -11311,9 +11310,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
 ; CHECK-LABEL: mscatter_unit_stride_with_offset:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi a0, a0, 10
-; CHECK-NEXT:    li a1, 2
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vsse16.v v8, (a0), a1
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
index 9f8f1b224db9e85..df944fada7964b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
@@ -114,8 +114,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    li a0, 8
-; RV64-NEXT:    vsse64.v v8, (a1), a0
+; RV64-NEXT:    vs1r.v v8, (a1)
 ; RV64-NEXT:    ret
   %step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
   %gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
index 06af0fc8971a543..f8a5b0f9d03a6ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
@@ -89,9 +89,8 @@ define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) {
 ; CHECK-LABEL: strided_store_i8_nostride:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    li a2, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vsse8.v v8, (a0), a2, v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m)
   ret void

``````````

</details>


https://github.com/llvm/llvm-project/pull/66677