[llvm] 4c8dbe9 - Allow calls with known writes when trying to remove allocas

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 16 11:04:45 PST 2021


Author: Philip Reames
Date: 2021-12-16T11:04:34-08:00
New Revision: 4c8dbe96d748440b1724d86c77efd9c890a48990

URL: https://github.com/llvm/llvm-project/commit/4c8dbe96d748440b1724d86c77efd9c890a48990
DIFF: https://github.com/llvm/llvm-project/commit/4c8dbe96d748440b1724d86c77efd9c890a48990.diff

LOG: Allow calls with known writes when trying to remove allocas

isAllocSiteRemovable tracks whether all uses of an alloca are both non-capturing, and non-reading. If so, we can remove said alloca because nothing can depend on its content or address.

This patch extends this reasoning to allow writes from calls where we can prove the call has no side effect other than writing to said allocation. This is a fairly natural fit for the existing code with one subtle detail - the call can write to multiple locations at once which stores can't.

As a follow up, we can likely sink the intrinsic handling into the generic code by allowing readnone arguments as well. I deliberately left that out to minimize conceptual churn.

Differential Revision: https://reviews.llvm.org/D115829

Added: 
    llvm/test/Transforms/InstCombine/trivial-dse-calls.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1f81624f79e78..a797d5b05deb1 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2560,6 +2560,42 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
   return isAllocLikeFn(V, TLI) && V != AI;
 }
 
+/// Given a call CB which uses an address UsedV, return true if we can prove the
+/// calls only effect is storing to V.
+static bool isRemovableWrite(CallBase &CB, Value *UsedV) {
+  if (!CB.use_empty())
+    // TODO: add recursion if returned attribute is present
+    return false;
+
+  if (!CB.willReturn() || !CB.doesNotThrow() || !CB.onlyAccessesArgMemory() ||
+      CB.isTerminator())
+    return false;
+
+  if (CB.hasOperandBundles())
+    return false;
+
+  for (unsigned i = 0; i < CB.arg_size(); i++) {
+    if (!CB.getArgOperand(i)->getType()->isPointerTy())
+      continue;
+    if (!CB.doesNotCapture(i))
+      // capture would allow the address to be read back in an untracked manner
+      return false;
+    if (UsedV != CB.getArgOperand(i)) {
+      if (!CB.onlyReadsMemory(i))
+        // A write to another memory location keeps the call live, and thus we
+        // must keep the alloca so that the call has somewhere to write to.
+        // TODO: This results in an inprecision when two values derived from the
+        // same alloca are passed as arguments to the same function.
+        return false;
+      continue;
+    }
+    if (!CB.paramHasAttr(i, Attribute::WriteOnly))
+      // a read would hold the address live
+      return false;
+  }
+  return true;
+}
+
 static bool isAllocSiteRemovable(Instruction *AI,
                                  SmallVectorImpl<WeakTrackingVH> &Users,
                                  const TargetLibraryInfo *TLI) {
@@ -2627,6 +2663,11 @@ static bool isAllocSiteRemovable(Instruction *AI,
           }
         }
 
+        if (isRemovableWrite(*cast<CallBase>(I), PI)) {
+          Users.emplace_back(I);
+          continue;
+        }
+
         if (isFreeCall(I, TLI)) {
           Users.emplace_back(I);
           continue;

diff  --git a/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll b/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll
new file mode 100644
index 0000000000000..33ca4772971ca
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll
@@ -0,0 +1,200 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+declare void @unknown()
+declare void @f(i8*)
+declare void @f2(i8*, i8*)
+
+; Basic case for DSEing a trivially dead writing call
+define void @test_dead() {
+; CHECK-LABEL: @test_dead(
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn
+  ret void
+}
+
+; Add in canonical lifetime intrinsics
+define void @test_lifetime() {
+; CHECK-LABEL: @test_lifetime(
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast)
+  call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast)
+  ret void
+}
+
+; Add some unknown calls just to point out that this is use based, not
+; instruction order sensitive
+define void @test_lifetime2() {
+; CHECK-LABEL: @test_lifetime2(
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast)
+  call void @unknown()
+  call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn
+  call void @unknown()
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast)
+  ret void
+}
+
+define i32 @test_neg_read_after() {
+; CHECK-LABEL: @test_neg_read_after(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn
+  %res = load i32, i32* %a
+  ret i32 %res
+}
+
+
+define void @test_neg_infinite_loop() {
+; CHECK-LABEL: @test_neg_infinite_loop(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind
+  ret void
+}
+
+define void @test_neg_throw() {
+; CHECK-LABEL: @test_neg_throw(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  call void @f(i8* writeonly nocapture %bitcast) argmemonly willreturn
+  ret void
+}
+
+define void @test_neg_extra_write() {
+; CHECK-LABEL: @test_neg_extra_write(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  call void @f(i8* writeonly nocapture %bitcast) nounwind willreturn
+  ret void
+}
+
+; In this case, we can't remove a1 because we need to preserve the write to
+; a2, and if we leave the call around, we need memory to pass to the first arg.
+define void @test_neg_unmodeled_write() {
+; CHECK-LABEL: @test_neg_unmodeled_write(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast i32* [[A2]] to i8*
+; CHECK-NEXT:    call void @f2(i8* nocapture nonnull writeonly [[BITCAST]], i8* nonnull [[BITCAST2]]) #[[ATTR1]]
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %a2 = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  %bitcast2 = bitcast i32* %a2 to i8*
+  call void @f2(i8* nocapture writeonly %bitcast, i8* %bitcast2) argmemonly nounwind willreturn
+  ret void
+}
+
+define i32 @test_neg_captured_by_call() {
+; CHECK-LABEL: @test_neg_captured_by_call(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A2:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast i8** [[A2]] to i8*
+; CHECK-NEXT:    call void @f2(i8* nonnull writeonly [[BITCAST]], i8* nonnull [[BITCAST2]]) #[[ATTR1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[A2]] to i32**
+; CHECK-NEXT:    [[A_COPY_CAST1:%.*]] = load i32*, i32** [[TMP1]], align 8
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[A_COPY_CAST1]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = alloca i32, align 4
+  %a2 = alloca i8*, align 4
+  %bitcast = bitcast i32* %a to i8*
+  %bitcast2 = bitcast i8** %a2 to i8*
+  call void @f2(i8* writeonly %bitcast, i8* %bitcast2) argmemonly nounwind willreturn
+  %a_copy_cast = load i8*, i8** %a2
+  %a_copy = bitcast i8* %a_copy_cast to i32*
+  %res = load i32, i32* %a_copy
+  ret i32 %res
+}
+
+define i32 @test_neg_captured_before() {
+; CHECK-LABEL: @test_neg_captured_before(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR1]]
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = alloca i32, align 4
+  %a2 = alloca i8*, align 4
+  %bitcast = bitcast i32* %a to i8*
+  %bitcast2 = bitcast i8** %a2 to i8*
+  store i8* %bitcast, i8** %a2
+  call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn
+  %a_copy_cast = load i8*, i8** %a2
+  %a_copy = bitcast i8* %a_copy_cast to i32*
+  %res = load i32, i32* %a_copy
+  ret i32 %res
+}
+
+; Show that reading from unrelated memory is okay
+define void @test_unreleated_read() {
+; CHECK-LABEL: @test_unreleated_read(
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %a2 = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  %bitcast2 = bitcast i32* %a2 to i8*
+  call void @f2(i8* nocapture writeonly %bitcast, i8* nocapture readonly %bitcast2) argmemonly nounwind willreturn
+  ret void
+}
+
+; But that removing a capture of an unrelated pointer isn't okay.
+define void @test_neg_unreleated_capture() {
+; CHECK-LABEL: @test_neg_unreleated_capture(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast i32* [[A2]] to i8*
+; CHECK-NEXT:    call void @f2(i8* nocapture nonnull writeonly [[BITCAST]], i8* nonnull readonly [[BITCAST2]]) #[[ATTR1]]
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32, align 4
+  %a2 = alloca i32, align 4
+  %bitcast = bitcast i32* %a to i8*
+  %bitcast2 = bitcast i32* %a2 to i8*
+  call void @f2(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn
+  ret void
+}
+


        


More information about the llvm-commits mailing list