[llvm] [DSE] Add predicated vector length store support for masked store eli… (PR #134175)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 2 16:25:54 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Michael Berg (mcberg2021)

<details>
<summary>Changes</summary>

…mination

In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores.

This is the following up installment mentioned in: https://reviews.llvm.org/D132700 

---
Full diff: https://github.com/llvm/llvm-project/pull/134175.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+36) 
- (added) llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll (+34) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 935f21fd484f3..22eaeeafcf786 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -269,6 +269,42 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
       return OW_Unknown;
     return OW_Complete;
   }
+  if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
+    // Operands {0        , 1     , 2   , 3 }
+    //          {StoredVal, VecPtr, Mask, VL}
+    // Types.
+    VectorType *KillingTy =
+        cast<VectorType>(KillingII->getArgOperand(0)->getType());
+    VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
+    if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
+      return OW_Unknown;
+    // Element count.
+    if (KillingTy->getElementCount() != DeadTy->getElementCount())
+      return OW_Unknown;
+    // Pointers.
+    Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
+    Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
+    if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
+      return OW_Unknown;
+    // Masks.
+    // TODO: check that KillingII's mask is a superset of the DeadII's mask.
+    if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
+      return OW_Unknown;
+    // Lengths.
+    if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
+      return OW_Unknown;
+    AAMDNodes KillingAA = KillingII->getAAMetadata();
+    AAMDNodes DeadAA = DeadII->getAAMetadata();
+    // There must be scoped noalias metadata on both stores.
+    if (!KillingAA.Scope || !DeadAA.Scope ||
+        !KillingAA.NoAlias || !DeadAA.NoAlias)
+      return OW_Unknown;
+    // Check that both stores have the same scope and noalias metadata.
+    if (KillingAA.Scope != DeadAA.Scope ||
+        KillingAA.NoAlias != DeadAA.NoAlias)
+      return OW_Unknown;
+    return OW_Complete;
+  }
   return OW_Unknown;
 }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll b/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll
new file mode 100644
index 0000000000000..825acd16a9d2a
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=dse -S < %s | FileCheck %s
+target triple = "riscv64-unknown-linux-gnu"
+
+; Test predicated vector length masked stores for elimination
+
+define void @foo(ptr %a, i32 %vl, <vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2) {
+;
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> [[V1:%.*]], <vscale x 8 x i32> [[V2:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL:%.*]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> [[VP_OP]], ptr nonnull [[A:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL]]), !alias.scope [[META0:![0-9]+]], !noalias [[META5:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %v1, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !tbaa !16, !alias.scope !34, !noalias !37
+  %vp.op = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl)
+  call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %vp.op, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !alias.scope !34, !noalias !37
+  ret void
+}
+
+declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+declare void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32>, ptr nocapture, <vscale x 8 x i1>, i32)
+
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C/C++ TBAA"}
+!13 = !{!"int", !11, i64 0}
+!16 = !{!13, !13, i64 0}
+!28 = distinct !{!28, !"LVerDomain"}
+!30 = distinct !{!30, !"LVerDomain"}
+!34 = !{!35, !36}
+!35 = distinct !{!35, !28}
+!36 = distinct !{!36, !30}
+!37 = !{!38, !39}
+!38 = distinct !{!38, !28}
+!39 = distinct !{!39, !28}

``````````

</details>


https://github.com/llvm/llvm-project/pull/134175


More information about the llvm-commits mailing list