[llvm] 188d5c7 - [RISCV] Add a combine to form masked.store from unit strided store

Tue Sep 19 07:46:02 PDT 2023

Author: Philip Reames
Date: 2023-09-19T07:45:35-07:00
New Revision: 188d5c74426500809a36febe615523e111e9c74e

URL: https://github.com/llvm/llvm-project/commit/188d5c74426500809a36febe615523e111e9c74e
DIFF: https://github.com/llvm/llvm-project/commit/188d5c74426500809a36febe615523e111e9c74e.diff

LOG: [RISCV] Add a combine to form masked.store from unit strided store

Add a DAG combine to form a masked.store from a masked_strided_store intrinsic
with stride equal to element size. This is the store analogy to PR #65674.

As seen in the tests, this does pickup a few cases that we'd previously missed
due to selection ordering.  We match strided stores early without going through
the recently added generic mscatter combines, and thus weren't recognizing the
unit strided store.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
    llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
    llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 19e407414627dd4..0ab3b591c7b1a6b 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14467,6 +14467,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                  ISD::UNINDEXED, ISD::NON_EXTLOAD);
       return SDValue();
     }
+    case Intrinsic::riscv_masked_strided_store: {
+      auto *Store = cast<MemIntrinsicSDNode>(N);
+      SDValue Value = N->getOperand(2);
+      SDValue Base = N->getOperand(3);
+      SDValue Stride = N->getOperand(4);
+      SDValue Mask = N->getOperand(5);
+
+      // If the stride is equal to the element size in bytes,  we can use
+      // a masked.store.
+      const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
+      if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+          StrideC && StrideC->getZExtValue() == ElementSize)
+        return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
+                                  DAG.getUNDEF(XLenVT), Mask,
+                                  Store->getMemoryVT(), Store->getMemOperand(),
+                                  ISD::UNINDEXED, false);
+      return SDValue();
+    }
     case Intrinsic::riscv_vcpop:
     case Intrinsic::riscv_vcpop_mask:
     case Intrinsic::riscv_vfirst:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 7309fb9edec0657..60b61e889315cfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -11296,9 +11296,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
 define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
 ; CHECK-LABEL: mscatter_unit_stride:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 2
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vsse16.v v8, (a0), a1
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -11311,9 +11310,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
 ; CHECK-LABEL: mscatter_unit_stride_with_offset:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi a0, a0, 10
-; CHECK-NEXT:    li a1, 2
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vsse16.v v8, (a0), a1
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
index 9f8f1b224db9e85..df944fada7964b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
@@ -114,8 +114,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    li a0, 8
-; RV64-NEXT:    vsse64.v v8, (a1), a0
+; RV64-NEXT:    vs1r.v v8, (a1)
 ; RV64-NEXT:    ret
   %step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
   %gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step

diff  --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
index 06af0fc8971a543..f8a5b0f9d03a6ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
@@ -89,9 +89,8 @@ define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) {
 ; CHECK-LABEL: strided_store_i8_nostride:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    li a2, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vsse8.v v8, (a0), a2, v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m)
   ret void