[llvm] DSE: lift limitation on sizes being non-scalable (PR #110670)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 1 06:29:32 PDT 2024


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/110670

As AliasAnalysis now has support for scalable sizes, lift the limitation on analyzing scalable sizes in DeadStoreElimination.

-- 8< --
Based on #110669.

>From 45de0d89f85bcc25362ee5ed946840d7e92a4884 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 1 Oct 2024 11:58:47 +0100
Subject: [PATCH 1/2] DSE: pre-commit tests for scalable vectors

As AliasAnalysis now has support for scalable sizes, add tests to
DeadStoreElimination covering the scalable vectors case, in preparation
to extend it.
---
 .../offsetted-overlapping-stores.ll           | 56 ++++++++++++++++++-
 .../stores-of-existing-values.ll              | 52 +++++++++++++++++
 2 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
index fbab350008f4ee..c2f7adde7a22ee 100644
--- a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
@@ -47,6 +47,29 @@ bb:
   ret void
 }
 
+define void @ScalableVectorTestFullyOverlapping(ptr %arg, i32 %i) {
+; CHECK-LABEL: @ScalableVectorTestFullyOverlapping(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I7:%.*]] = add nuw nsw i32 [[I:%.*]], 1
+; CHECK-NEXT:    [[I8:%.*]] = zext i32 [[I7]] to i64
+; CHECK-NEXT:    [[I9:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I8]]
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[I9]], align 4
+; CHECK-NEXT:    [[I2:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT:    [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I2]]
+; CHECK-NEXT:    store <vscale x 2 x float> zeroinitializer, ptr [[I3]], align 16
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i7 = add nuw nsw i32 %i, 1
+  %i8 = zext i32 %i7 to i64
+  %i9 = getelementptr inbounds float, ptr %arg, i64 %i8
+  store float 0.0, ptr %i9, align 4
+  %i2 = zext i32 %i to i64
+  %i3 = getelementptr inbounds float, ptr %arg, i64 %i2
+  store <vscale x 2 x float> zeroinitializer, ptr %i3, align 16
+  ret void
+}
+
 define void @ArrayTestPartiallyOverlapping(i64 %0) {
 ;
 ; The DSE pass will not kill the store because the overlap is partial
@@ -55,9 +78,9 @@ define void @ArrayTestPartiallyOverlapping(i64 %0) {
 ; CHECK-LABEL: @ArrayTestPartiallyOverlapping(
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 10
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP0]], 15
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP5]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP0]], 15
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    store i32 1, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    store i64 0, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -97,3 +120,30 @@ bb:
   ret void
 }
 
+define void @ScalableVectorTestPartiallyOverlapping(ptr %arg, i32 %i) {
+;
+; The DSE pass will not kill the store because the overlap is partial
+; and won't fully clobber the original store.
+;
+; CHECK-LABEL: @ScalableVectorTestPartiallyOverlapping(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I2:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I2]]
+; CHECK-NEXT:    store <vscale x 2 x float> zeroinitializer, ptr [[I3]], align 16
+; CHECK-NEXT:    [[I5:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I6:%.*]] = zext i32 [[I5]] to i64
+; CHECK-NEXT:    [[I7:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I6]]
+; CHECK-NEXT:    store <vscale x 2 x float> zeroinitializer, ptr [[I7]], align 16
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i2 = zext i32 %i to i64
+  %i3 = getelementptr inbounds float, ptr %arg, i64 %i2
+  store <vscale x 2 x float> zeroinitializer, ptr %i3, align 16
+  %i5 = add nuw nsw i32 %i, 1
+  %i6 = zext i32 %i5 to i64
+  %i7 = getelementptr inbounds float, ptr %arg, i64 %i6
+  store <vscale x 2 x float> zeroinitializer, ptr %i7, align 16
+  ret void
+}
+
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index c9a0943de8cd98..ae203dfba7ddc1 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -655,3 +655,55 @@ exit:
   call void @use(ptr %p) argmemonly
   ret void
 }
+
+define void @scalable_scalable_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @scalable_scalable_redundant_store(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
+; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[GEP]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR]], align 32
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 2
+  store <vscale x 2 x i64> zeroinitializer, ptr %gep
+  store <vscale x 4 x i64> zeroinitializer, ptr %ptr
+  ret void
+}
+
+define void @scalable_fixed_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @scalable_fixed_redundant_store(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
+; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr [[GEP]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR]], align 32
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 2
+  store <2 x i64> zeroinitializer, ptr %gep
+  store <vscale x 4 x i64> zeroinitializer, ptr %ptr
+  ret void
+}
+
+define void @fixed_scalable_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @fixed_scalable_redundant_store(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
+; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[GEP]], align 16
+; CHECK-NEXT:    store <128 x i64> zeroinitializer, ptr [[PTR]], align 1024
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 2
+  store <vscale x 2 x i64> zeroinitializer, ptr %gep
+  store <128 x i64> zeroinitializer, ptr %ptr
+  ret void
+}
+
+define void @scalable_scalable_neg(ptr %ptr) {
+; CHECK-LABEL: @scalable_scalable_neg(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 8
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[GEP]], align 32
+; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 8
+  store <vscale x 4 x i64> zeroinitializer, ptr %gep
+  store <vscale x 2 x i64> zeroinitializer, ptr %ptr
+  ret void
+}

>From 809c9d06ea3c798706abc148fad73067467898eb Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 1 Oct 2024 14:16:20 +0100
Subject: [PATCH 2/2] DSE: lift limitation on sizes being non-scalable

As AliasAnalysis now has support for scalable sizes, lift the limitation
on analyzing scalable sizes in DeadStoreElimination.
---
 .../Scalar/DeadStoreElimination.cpp           | 38 ++++++++++++-------
 .../stores-of-existing-values.ll              | 12 ++----
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a304f7b056f5f7..40251db9731a62 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1016,15 +1016,25 @@ struct DSEState {
       return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
     }
 
-    const TypeSize KillingSize = KillingLocSize.getValue();
-    const TypeSize DeadSize = DeadLoc.Size.getValue();
-    // Bail on doing Size comparison which depends on AA for now
-    // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
-    const bool AnyScalable =
-        DeadSize.isScalable() || KillingLocSize.isScalable();
-
-    if (AnyScalable)
-      return OW_Unknown;
+    APInt KillingSize = APInt(64, KillingLocSize.getValue().getKnownMinValue());
+    APInt DeadSize = APInt(64, DeadLoc.Size.getValue().getKnownMinValue());
+
+    // We can compare lower-range(KillingSize) with upper-range(DeadSize), using
+    // VScale.
+    ConstantRange CR = getVScaleRange(&F, 64);
+    if (KillingLocSize.isScalable()) {
+      bool Overflow;
+      APInt LowerRange = CR.getUnsignedMin().umul_ov(KillingSize, Overflow);
+      if (!Overflow)
+        KillingSize = LowerRange;
+    }
+    if (DeadLoc.Size.isScalable()) {
+      bool Overflow;
+      APInt UpperRange = CR.getUnsignedMax().umul_ov(DeadSize, Overflow);
+      if (!Overflow)
+        DeadSize = UpperRange;
+    }
+
     // Query the alias information
     AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
 
@@ -1032,14 +1042,14 @@ struct DSEState {
     // the killing store was larger than the dead store.
     if (AAR == AliasResult::MustAlias) {
       // Make sure that the KillingSize size is >= the DeadSize size.
-      if (KillingSize >= DeadSize)
+      if (KillingSize.uge(DeadSize))
         return OW_Complete;
     }
 
     // If we hit a partial alias we may have a full overwrite
     if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
       int32_t Off = AAR.getOffset();
-      if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
+      if (Off >= 0 && KillingSize.uge(uint64_t(Off) + DeadSize))
         return OW_Complete;
     }
 
@@ -1089,16 +1099,16 @@ struct DSEState {
     if (DeadOff >= KillingOff) {
       // If the dead access ends "not after" the killing access then the
       // dead one is completely overwritten by the killing one.
-      if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
+      if (KillingSize.uge(uint64_t(DeadOff - KillingOff) + DeadSize))
         return OW_Complete;
       // If start of the dead access is "before" end of the killing access
       // then accesses overlap.
-      else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
+      if (KillingSize.ugt(uint64_t(DeadOff - KillingOff)))
         return OW_MaybePartial;
     }
     // If start of the killing access is "before" end of the dead access then
     // accesses overlap.
-    else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
+    else if (DeadSize.ugt(uint64_t(KillingOff - DeadOff))) {
       return OW_MaybePartial;
     }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index ae203dfba7ddc1..e89d6dea164750 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -658,9 +658,7 @@ exit:
 
 define void @scalable_scalable_redundant_store(ptr %ptr) {
 ; CHECK-LABEL: @scalable_scalable_redundant_store(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
-; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[GEP]], align 16
-; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR]], align 32
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
 ; CHECK-NEXT:    ret void
 ;
   %gep = getelementptr i64, ptr %ptr, i64 2
@@ -671,9 +669,7 @@ define void @scalable_scalable_redundant_store(ptr %ptr) {
 
 define void @scalable_fixed_redundant_store(ptr %ptr) {
 ; CHECK-LABEL: @scalable_fixed_redundant_store(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
-; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr [[GEP]], align 16
-; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR]], align 32
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
 ; CHECK-NEXT:    ret void
 ;
   %gep = getelementptr i64, ptr %ptr, i64 2
@@ -684,9 +680,7 @@ define void @scalable_fixed_redundant_store(ptr %ptr) {
 
 define void @fixed_scalable_redundant_store(ptr %ptr) {
 ; CHECK-LABEL: @fixed_scalable_redundant_store(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
-; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[GEP]], align 16
-; CHECK-NEXT:    store <128 x i64> zeroinitializer, ptr [[PTR]], align 1024
+; CHECK-NEXT:    store <128 x i64> zeroinitializer, ptr [[PTR:%.*]], align 1024
 ; CHECK-NEXT:    ret void
 ;
   %gep = getelementptr i64, ptr %ptr, i64 2



More information about the llvm-commits mailing list