[clang] 7037331 - [Coroutines] [CoroElide] Don't think exceptional terminator don't leak coro handle unconditionally any more

Wed Aug 23 01:52:30 PDT 2023

Author: Chuanqi Xu
Date: 2023-08-23T16:51:53+08:00
New Revision: 7037331a2f05990cd59f35a7c0f6ce87c0f3cb5f

URL: https://github.com/llvm/llvm-project/commit/7037331a2f05990cd59f35a7c0f6ce87c0f3cb5f
DIFF: https://github.com/llvm/llvm-project/commit/7037331a2f05990cd59f35a7c0f6ce87c0f3cb5f.diff

LOG: [Coroutines] [CoroElide] Don't think exceptional terminator don't leak coro handle unconditionally any more

Close https://github.com/llvm/llvm-project/issues/59723.

The fundamental cause of the above issue is that we assumed the memory
of coroutine frame can be released by stack unwinding automatically
if the allocation of the coroutine frame is elided. But we missed one
point: the stack unwinding has different semantics with the explicit
coroutine_handle<>::destroy(). Since the latter is explicit so it shows
the intention of the user. So we can blame the user to destroy the
coroutine frame incorrectly in case of use-after-free happens. But we
can't do so with stack unwinding.

So after this patch, we won't think the exceptional terminator don't
leak the coroutine handle unconditionally. Instead, we think the
exceptional terminator will leak the coroutine handle too if the
coroutine is leaked somewhere along the search path.

Concretely for C++, we can think the exceptional terminator is not
special any more. Maybe this may cause some performance regressions.
But I've tested the motivating example (std::generator). And on the
other side, the coroutine elision is a middle end opitmization and not
a language feature. So we don't think we should blame such regressions
especially we are correcting the miscompilations.

Added: 
    clang/test/CodeGenCoroutines/pr59723.cpp

Modified: 
    clang/test/CodeGenCoroutines/coro-halo.cpp
    llvm/lib/Transforms/Coroutines/CoroElide.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGenCoroutines/coro-halo.cpp b/clang/test/CodeGenCoroutines/coro-halo.cpp
index 6244f130b7be23..e75bedaf81fa22 100644

--- a/clang/test/CodeGenCoroutines/coro-halo.cpp
+++ b/clang/test/CodeGenCoroutines/coro-halo.cpp
@@ -1,5 +1,7 @@
 // This tests that the coroutine heap allocation elision optimization could happen succesfully.
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s \
+// RUN:   -fcxx-exceptions -fexceptions -o - | FileCheck %s
 
 #include "Inputs/coroutine.h"
 #include "Inputs/numeric.h"

diff  --git a/clang/test/CodeGenCoroutines/pr59723.cpp b/clang/test/CodeGenCoroutines/pr59723.cpp
new file mode 100644
index 00000000000000..7fc9995f417ac3
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/pr59723.cpp
@@ -0,0 +1,237 @@
+// This is reduced test case from https://github.com/llvm/llvm-project/issues/59723.
+// This is not a minimal reproducer intentionally to check the compiler's ability.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fcxx-exceptions\
+// RUN:     -fexceptions -O2 -emit-llvm %s -o - | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+// executor and operation base
+
+class bug_any_executor;
+
+struct bug_async_op_base
+{
+	void invoke();
+
+protected:
+
+	~bug_async_op_base() = default;
+};
+
+class bug_any_executor
+{
+	using op_type = bug_async_op_base;
+
+public:
+
+	virtual ~bug_any_executor() = default;
+
+	// removing noexcept enables clang to find that the pointer has escaped
+	virtual void post(op_type& op) noexcept = 0;
+
+	virtual void wait() noexcept = 0;
+};
+
+class bug_thread_executor : public bug_any_executor
+{
+
+public:
+
+	void start()
+	{
+		
+	}
+
+	~bug_thread_executor()
+	{
+	}
+
+	// although this implementation is not realy noexcept due to allocation but I have a real one that is and required to be noexcept
+	virtual void post(bug_async_op_base& op) noexcept override;
+
+	virtual void wait() noexcept override
+	{
+		
+	}
+};
+
+// task and promise
+
+struct bug_final_suspend_notification
+{
+	virtual std::coroutine_handle<> get_waiter() = 0;
+};
+
+class bug_task;
+
+class bug_task_promise
+{
+	friend bug_task;
+public:
+
+	bug_task get_return_object() noexcept;
+
+	constexpr std::suspend_always initial_suspend() noexcept { return {}; }
+
+	std::suspend_always final_suspend() noexcept 
+	{
+		return {};
+	}
+
+	void unhandled_exception() noexcept;
+
+	constexpr void return_void() const noexcept {}
+
+	void get_result() const
+	{
+		
+	}
+};
+
+template <class T, class U>
+T exchange(T &&t, U &&u) {
+    T ret = t;
+    t = u;
+    return ret;
+}
+
+class bug_task
+{
+	friend bug_task_promise;
+	using handle = std::coroutine_handle<>;
+	using promise_t = bug_task_promise;
+
+	bug_task(handle coro, promise_t* p) noexcept : this_coro{ coro }, this_promise{ p }
+	{
+	
+	}
+
+public:
+	using promise_type = bug_task_promise;
+
+    bug_task(bug_task&& other) noexcept
+		: this_coro{ exchange(other.this_coro, nullptr) }, this_promise{ exchange(other.this_promise, nullptr) } { 
+		
+	}
+
+	~bug_task()
+	{
+		if (this_coro)
+			this_coro.destroy();
+	}
+
+	constexpr bool await_ready() const noexcept
+	{
+		return false;
+	}
+
+	handle await_suspend(handle waiter) noexcept
+	{
+		return this_coro;
+	}
+
+	void await_resume() 
+	{
+		return this_promise->get_result();
+	}
+
+	handle this_coro;
+	promise_t* this_promise;
+};
+
+bug_task bug_task_promise::get_return_object() noexcept
+{
+	return { std::coroutine_handle<bug_task_promise>::from_promise(*this), this };
+}
+
+// spawn operation and spawner
+
+template<class Handler>
+class bug_spawn_op final : public bug_async_op_base, bug_final_suspend_notification
+{
+	Handler handler;
+	bug_task task_;
+
+public:
+
+	bug_spawn_op(Handler handler, bug_task&& t)
+		: handler { handler }, task_{ static_cast<bug_task&&>(t) } {}
+
+	virtual std::coroutine_handle<> get_waiter() override
+	{
+		handler();
+		return std::noop_coroutine();
+	}
+};
+
+class bug_spawner;
+
+struct bug_spawner_awaiter
+{
+	bug_spawner& s;
+	std::coroutine_handle<> waiter;
+
+	bug_spawner_awaiter(bug_spawner& s) : s{ s } {}
+
+	bool await_ready() const noexcept;
+
+	void await_suspend(std::coroutine_handle<> coro);
+
+	void await_resume() {}
+};
+
+class bug_spawner
+{
+	friend bug_spawner_awaiter;
+
+	struct final_handler_t
+	{
+		bug_spawner& s;
+
+		void operator()()
+		{
+			s.awaiter_->waiter.resume();
+		}
+	};
+
+public:
+
+	bug_spawner(bug_any_executor& ex) : ex_{ ex } {}
+
+	void spawn(bug_task&& t) {
+		using op_t = bug_spawn_op<final_handler_t>;
+		// move task into ptr
+		op_t* ptr = new op_t(final_handler_t{ *this }, static_cast<bug_task&&>(t));
+		++count_;
+		ex_.post(*ptr); // ptr escapes here thus task escapes but clang can't deduce that unless post() is not noexcept
+	}
+
+	bug_spawner_awaiter wait() noexcept { return { *this }; }
+
+private:
+	bug_any_executor& ex_; // if bug_thread_executor& is used instead enables clang to detect the escape of the promise
+	bug_spawner_awaiter* awaiter_ = nullptr;
+	unsigned count_ = 0;
+};
+
+// test case
+
+bug_task bug_spawned_task(int id, int inc)
+{
+	co_return;
+}
+
+struct A {
+    A();
+};
+
+void throwing_fn(bug_spawner& s) {
+	s.spawn(bug_spawned_task(1, 2));
+    throw A{};
+}
+
+// Check that the coroutine frame of bug_spawned_task are allocated from operator new.
+// CHECK: define{{.*}}@_Z11throwing_fnR11bug_spawner
+// CHECK-NOT: alloc
+// CHECK: %[[CALL:.+]] = {{.*}}@_Znwm(i64{{.*}} 24)
+// CHECK: store ptr @_Z16bug_spawned_taskii.resume, ptr %[[CALL]]

diff  --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index d78ab1c1ea2842..d0606c15f3d5b2 100644
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -194,12 +194,49 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
   for (auto *DA : It->second)
     Visited.insert(DA->getParent());
 
+  SmallPtrSet<const BasicBlock *, 32> EscapingBBs;
+  for (auto *U : CB->users()) {
+    // The use from coroutine intrinsics are not a problem.
+    if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
+      continue;
+
+    // Think all other usages may be an escaping candidate conservatively.
+    //
+    // Note that the major user of switch ABI coroutine (the C++) will store
+    // resume.fn, destroy.fn and the index to the coroutine frame immediately.
+    // So the parent of the coro.begin in C++ will be always escaping.
+    // Then we can't get any performance benefits for C++ by improving the
+    // precision of the method.
+    //
+    // The reason why we still judge it is we want to make LLVM Coroutine in
+    // switch ABIs to be self contained as much as possible instead of a
+    // by-product of C++20 Coroutines.
+    EscapingBBs.insert(cast<Instruction>(U)->getParent());
+  }
+
+  bool PotentiallyEscaped = false;
+
   do {
     const auto *BB = Worklist.pop_back_val();
     if (!Visited.insert(BB).second)
       continue;
-    if (TIs.count(BB))
-      return true;
+
+    // A Path insensitive marker to test whether the coro.begin escapes.
+    // It is intentional to make it path insensitive while it may not be
+    // precise since we don't want the process to be too slow.
+    PotentiallyEscaped |= EscapingBBs.count(BB);
+
+    if (TIs.count(BB)) {
+      if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped)
+        return true;
+
+      // If the function ends with the exceptional terminator, the memory used
+      // by the coroutine frame can be released by stack unwinding
+      // automatically. So we can think the coro.begin doesn't escape if it
+      // exits the function by exceptional terminator.
+
+      continue;
+    }
 
     // Conservatively say that there is potentially a path.
     if (!--Limit)
@@ -236,36 +273,36 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
   // memory location storing that value and not the virtual register.
 
   SmallPtrSet<BasicBlock *, 8> Terminators;
-  // First gather all of the non-exceptional terminators for the function.
+  // First gather all of the terminators for the function.
   // Consider the final coro.suspend as the real terminator when the current
   // function is a coroutine.
-    for (BasicBlock &B : *F) {
-      auto *TI = B.getTerminator();
-      if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
-          !isa<UnreachableInst>(TI))
-        Terminators.insert(&B);
-    }
+  for (BasicBlock &B : *F) {
+    auto *TI = B.getTerminator();
+
+    if (TI->getNumSuccessors() != 0 || isa<UnreachableInst>(TI))
+      continue;
+
+    Terminators.insert(&B);
+  }
 
   // Filter out the coro.destroy that lie along exceptional paths.
   SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
   for (const auto &It : DestroyAddr) {
-    // If there is any coro.destroy dominates all of the terminators for the
-    // coro.begin, we could know the corresponding coro.begin wouldn't escape.
-    for (Instruction *DA : It.second) {
-      if (llvm::all_of(Terminators, [&](auto *TI) {
-            return DT.dominates(DA, TI->getTerminator());
-          })) {
-        ReferencedCoroBegins.insert(It.first);
-        break;
-      }
-    }
-
-    // Whether there is any paths from coro.begin to Terminators which not pass
-    // through any of the coro.destroys.
+    // If every terminators is dominated by coro.destroy, we could know the
+    // corresponding coro.begin wouldn't escape.
+    //
+    // Otherwise hasEscapePath would decide whether there is any paths from
+    // coro.begin to Terminators which not pass through any of the
+    // coro.destroys.
     //
     // hasEscapePath is relatively slow, so we avoid to run it as much as
     // possible.
-    if (!ReferencedCoroBegins.count(It.first) &&
+    if (llvm::all_of(Terminators,
+                     [&](auto *TI) {
+                       return llvm::any_of(It.second, [&](auto *DA) {
+                         return DT.dominates(DA, TI->getTerminator());
+                       });
+                     }) ||
         !hasEscapePath(It.first, Terminators))
       ReferencedCoroBegins.insert(It.first);
   }