[clang] [llvm] [Coroutines] Drop dead instructions more aggressively in addMustTailToCoroResumes() (PR #85271)

via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 20 02:45:08 PDT 2024


https://github.com/zmodem updated https://github.com/llvm/llvm-project/pull/85271

>From 97c0ff6c3b05829eb4f0b0350b2c2d000483fe06 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Thu, 14 Mar 2024 17:09:44 +0100
Subject: [PATCH 1/5] [Coroutines] Drop dead instructions more aggressively in
 addMustTailToCoroResumes()

The old code used isInstructionTriviallyDead() when walking the path
from a resume call to function return to check if the call is in tail
position.

However, since the code was walking forwards it was not able to get past
instructions such as:

  %gep = getelementptr inbounds i64, ptr %alloc.var, i32 0
  %foo = ptrtoint ptr %gep to i64

This patch calls SimplifyInstructionsInBlock() before walking a block to
remove any dead instructions in a more rigorous fashion.
---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp            | 6 ++----
 llvm/test/Transforms/Coroutines/coro-split-musttail7.ll | 7 ++++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 0fce596d3e2ca0..5d3106f157edcd 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1203,10 +1203,6 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
       if (isa<BitCastInst>(I) || I->isDebugOrPseudoInst() ||
           I->isLifetimeStartOrEnd())
         I = I->getNextNode();
-      else if (isInstructionTriviallyDead(I))
-        // Duing we are in the middle of the transformation, we need to erase
-        // the dead instruction manually.
-        I = &*I->eraseFromParent();
       else
         break;
     }
@@ -1245,6 +1241,7 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
       }
 
       BasicBlock *Succ = BR->getSuccessor(SuccIndex);
+      SimplifyInstructionsInBlock(Succ);
       scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
       I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime());
 
@@ -1288,6 +1285,7 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
         return false;
 
       BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
+      SimplifyInstructionsInBlock(BB);
       scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
       I = GetFirstValidInstruction(BB->getFirstNonPHIOrDbgOrLifetime());
       continue;
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index 2257d5aee473ee..241de5cfb25a7d 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -1,6 +1,6 @@
 ; Tests that sinked lifetime markers wouldn't provent optimization
 ; to convert a resuming call to a musttail call.
-; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll
+; The difference between this and coro-split-musttail5.ll and coro-split-musttail6.ll
 ; is that this contains dead instruction generated during the transformation,
 ; which makes the optimization harder.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
@@ -27,6 +27,11 @@ await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
   call fastcc void @fakeresume1(ptr align 8 null)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+
+  ; These (non-trivially) dead instructions are in the way.
+  %gep = getelementptr inbounds i64, ptr %alloc.var, i32 0
+  %foo = ptrtoint ptr %gep to i64
+
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
     i8 1, label %exit

>From e268280f2e384619b4c5b948f7ecf23ebe26b359 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Fri, 15 Mar 2024 15:23:12 +0100
Subject: [PATCH 2/5] skip any side-effect free instructions when looking for
 the return

---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 77 ++++++++------------
 1 file changed, 29 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 5d3106f157edcd..df57f9028e380f 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1197,18 +1197,6 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
   assert(InitialInst->getModule());
   const DataLayout &DL = InitialInst->getModule()->getDataLayout();
 
-  auto GetFirstValidInstruction = [](Instruction *I) {
-    while (I) {
-      // BitCastInst wouldn't generate actual code so that we could skip it.
-      if (isa<BitCastInst>(I) || I->isDebugOrPseudoInst() ||
-          I->isLifetimeStartOrEnd())
-        I = I->getNextNode();
-      else
-        break;
-    }
-    return I;
-  };
-
   auto TryResolveConstant = [&ResolvedValues](Value *V) {
     auto It = ResolvedValues.find(V);
     if (It != ResolvedValues.end())
@@ -1217,8 +1205,9 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
   };
 
   Instruction *I = InitialInst;
-  while (I->isTerminator() || isa<CmpInst>(I)) {
+  while (true) {
     if (isa<ReturnInst>(I)) {
+      assert(!cast<ReturnInst>(I)->getReturnValue());
       ReplaceInstWithInst(InitialInst, I->clone());
       return true;
     }
@@ -1241,42 +1230,27 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
       }
 
       BasicBlock *Succ = BR->getSuccessor(SuccIndex);
-      SimplifyInstructionsInBlock(Succ);
       scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
-      I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime());
-
+      I = Succ->getFirstNonPHIOrDbgOrLifetime();
       continue;
     }
 
-    if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
+    if (auto *Cmp = dyn_cast<CmpInst>(I)) {
       // If the case number of suspended switch instruction is reduced to
       // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
-      auto *BR = dyn_cast<BranchInst>(
-          GetFirstValidInstruction(CondCmp->getNextNode()));
-      if (!BR || !BR->isConditional() || CondCmp != BR->getCondition())
-        return false;
-
-      // And the comparsion looks like : %cond = icmp eq i8 %V, constant.
-      // So we try to resolve constant for the first operand only since the
-      // second operand should be literal constant by design.
-      ConstantInt *Cond0 = TryResolveConstant(CondCmp->getOperand(0));
-      auto *Cond1 = dyn_cast<ConstantInt>(CondCmp->getOperand(1));
-      if (!Cond0 || !Cond1)
-        return false;
-
-      // Both operands of the CmpInst are Constant. So that we could evaluate
-      // it immediately to get the destination.
-      auto *ConstResult =
-          dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands(
-              CondCmp->getPredicate(), Cond0, Cond1, DL));
-      if (!ConstResult)
-        return false;
-
-      ResolvedValues[BR->getCondition()] = ConstResult;
-
-      // Handle this branch in next iteration.
-      I = BR;
-      continue;
+      // Try to constant fold it.
+      ConstantInt *Cond0 = TryResolveConstant(Cmp->getOperand(0));
+      ConstantInt *Cond1 = TryResolveConstant(Cmp->getOperand(1));
+      if (Cond0 && Cond1) {
+        ConstantInt *Result =
+            dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands(
+                Cmp->getPredicate(), Cond0, Cond1, DL));
+        if (Result) {
+          ResolvedValues[Cmp] = Result;
+          I = I->getNextNode();
+          continue;
+        }
+      }
     }
 
     if (auto *SI = dyn_cast<SwitchInst>(I)) {
@@ -1284,14 +1258,21 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
       if (!Cond)
         return false;
 
-      BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
-      SimplifyInstructionsInBlock(BB);
-      scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
-      I = GetFirstValidInstruction(BB->getFirstNonPHIOrDbgOrLifetime());
+      BasicBlock *Succ = SI->findCaseValue(Cond)->getCaseSuccessor();
+      scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
+      I = Succ->getFirstNonPHIOrDbgOrLifetime();
+      continue;
+    }
+
+    if (I->isDebugOrPseudoInst() || I->isLifetimeStartOrEnd() ||
+        !I->mayHaveSideEffects()) {
+      // We can skip instructions without side effects. If their values are
+      // needed, we'll notice later, e.g. when hitting a conditional branch.
+      I = I->getNextNode();
       continue;
     }
 
-    return false;
+    break;
   }
 
   return false;

>From bbf499cfa6e8a95961c3b63aa0bd3639c46d5e0f Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 20 Mar 2024 10:16:21 +0100
Subject: [PATCH 3/5] return %foo in the lit test

---
 llvm/test/Transforms/Coroutines/coro-split-musttail7.ll | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index 241de5cfb25a7d..d0d5005587bda6 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -8,7 +8,7 @@
 
 declare void @fakeresume1(ptr align 8)
 
-define void @g() #0 {
+define i64 @g() #0 {
 entry:
   %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
   %alloc = call ptr @malloc(i64 16) #3
@@ -41,8 +41,9 @@ await.ready:
   call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
   br label %exit
 exit:
+  %result = phi i64 [0, %entry], [0, %entry], [%foo, %await.suspend], [%foo, %await.suspend], [%foo, %await.ready]
   call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
+  ret i64 %result
 }
 
 ; Verify that in the resume part resume call is marked with musttail.

>From 98f917b626181038123f430a810f6c48c2a50f58 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 20 Mar 2024 10:27:04 +0100
Subject: [PATCH 4/5] add clang codegen test

---
 .../coro-symmetric-transfer-04.cpp            | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 clang/test/CodeGenCoroutines/coro-symmetric-transfer-04.cpp

diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-04.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-04.cpp
new file mode 100644
index 00000000000000..de7755d8ccc960
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-04.cpp
@@ -0,0 +1,65 @@
+// This tests that the symmetric transfer at the final suspend point could happen successfully.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s -o - | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+struct Task {
+  struct promise_type {
+    struct FinalAwaiter {
+      bool await_ready() const noexcept { return false; }
+      template <typename PromiseType>
+      std::coroutine_handle<> await_suspend(std::coroutine_handle<PromiseType> h) noexcept {
+        return h.promise().continuation;
+      }
+      void await_resume() noexcept {}
+    };
+    Task get_return_object() noexcept {
+      return std::coroutine_handle<promise_type>::from_promise(*this);
+    }
+    std::suspend_always initial_suspend() noexcept { return {}; }
+    FinalAwaiter final_suspend() noexcept { return {}; }
+    void unhandled_exception() noexcept {}
+    void return_value(int x) noexcept {
+      _value = x;
+    }
+    std::coroutine_handle<> continuation;
+    int _value;
+  };
+
+  Task(std::coroutine_handle<promise_type> handle) : handle(handle), stuff(123) {}
+
+  struct Awaiter {
+    std::coroutine_handle<promise_type> handle;
+    Awaiter(std::coroutine_handle<promise_type> handle) : handle(handle) {}
+    bool await_ready() const noexcept { return false; }
+    std::coroutine_handle<void> await_suspend(std::coroutine_handle<void> continuation) noexcept {
+      handle.promise().continuation = continuation;
+      return handle;
+    }
+    int await_resume() noexcept {
+      int ret = handle.promise()._value;
+      handle.destroy();
+      return ret;
+    }
+  };
+
+  auto operator co_await() {
+    auto handle_ = handle;
+    handle = nullptr;
+    return Awaiter(handle_);
+  }
+
+private:
+  std::coroutine_handle<promise_type> handle;
+  int stuff;
+};
+
+Task task0() {
+  co_return 43;
+}
+
+// CHECK-LABEL: define{{.*}} void @_Z5task0v.resume
+// This checks we are still in the scope of the current function.
+// CHECK-NOT: {{^}}}
+// CHECK: musttail call fastcc void
+// CHECK-NEXT: ret void

>From 9b30f1776f98b6a0bf2cccb051f2bba23a76b539 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 20 Mar 2024 10:42:11 +0100
Subject: [PATCH 5/5] use wouldInstructionBeTriviallyDead() instead

---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index df57f9028e380f..ac6b8f1014dfe9 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1265,7 +1265,7 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
     }
 
     if (I->isDebugOrPseudoInst() || I->isLifetimeStartOrEnd() ||
-        !I->mayHaveSideEffects()) {
+        wouldInstructionBeTriviallyDead(I)) {
       // We can skip instructions without side effects. If their values are
       // needed, we'll notice later, e.g. when hitting a conditional branch.
       I = I->getNextNode();



More information about the cfe-commits mailing list