[clang] 7dccdd7 - [Coroutines] Don't mark the parameter attribute of resume function as noalias

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Mon Dec 12 18:20:00 PST 2022


Author: Chuanqi Xu
Date: 2022-12-13T10:19:24+08:00
New Revision: 7dccdd76b3749508f4a4147b0ba1f7f2689bebb2

URL: https://github.com/llvm/llvm-project/commit/7dccdd76b3749508f4a4147b0ba1f7f2689bebb2
DIFF: https://github.com/llvm/llvm-project/commit/7dccdd76b3749508f4a4147b0ba1f7f2689bebb2.diff

LOG: [Coroutines] Don't mark the parameter attribute of resume function as noalias

Close https://github.com/llvm/llvm-project/issues/59221.

The root cause for the problem is that we marked the parameter of the
resume/destroy functions as noalias previously. But this is not true.

See https://github.com/llvm/llvm-project/issues/59221 for the details.
Long story short, for this C++ program
(https://compiler-explorer.com/z/6qGcozG93), the optimized frame will be
something like:

```
struct test_frame {
    void (*__resume_)(), // a function pointer points to the
`test.resume` function, which can be imaged as the test() function in
the example.
    ....
    struct a_frame {
          ...
          void **caller; // may points to test_frame at runtime.
    };
};
```

And the function a and function test looks just like:

```
define i32 @a(ptr noalias %alloc_8) {
  %alloc_8_16 = getelementptr ptr, ptr %alloc_8, i64 16
  store i32 42, ptr %alloc_8_16, align 8
  %alloc_8_8 = getelementptr ptr, ptr %alloc_8, i64 8
  %alloc = load ptr, ptr %alloc_8_8, align 8
  %p = load ptr, ptr %alloc, align 8
  %r = call i32 %p(ptr %alloc)
  ret i32 %r
}

define i32 @b(ptr %p) {
entry:
  %alloc = alloca [128 x i8], align 8
  %alloc_8 = getelementptr ptr, ptr %alloc, i64 8
  %alloc_8_8 = getelementptr ptr, ptr %alloc_8, i64 8
  store ptr %alloc, ptr %alloc_8_8, align 8
  store ptr %p, ptr %alloc, align 8
  %r = call i32 @a(ptr nonnull %alloc_8)
  ret i32 %r
}
```

Here inside the function `a`,  we can access the parameter `%alloc_8` by
`%alloc` and we pass `%alloc` to an unknown function. So it breaks the
assumption of `noalias` parameter.

Note that although only CoroElide optimization can put a frame inside
another frame directly, the following case is not valid too:

```
struct test_frame {
    ....
   void **a_frame; // may points to a_frame at runtime.
};

 struct a_frame {
    void **caller; // may points to test_frame at runtime.
};
```

Since the C++ language allows the programmer to get the address of
coroutine frames, we can't assume the above case wouldn't happen in the
source codes. So we can't set the parameter as noalias no matter if
CoroElide applies or not. And for other languages, it may be safe if
they don't allow the programmers to get the address of coroutine frames.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D139295

Added: 
    clang/test/CodeGenCoroutines/pr59221.cpp

Modified: 
    llvm/lib/Transforms/Coroutines/CoroSplit.cpp
    llvm/test/Transforms/Coroutines/coro-debug-dbg.addr.ll
    llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll
    llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll
    llvm/test/Transforms/Coroutines/coro-debug.ll
    llvm/test/Transforms/Coroutines/coro-frame.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGenCoroutines/pr59221.cpp b/clang/test/CodeGenCoroutines/pr59221.cpp
new file mode 100644
index 000000000000..ae5f6fdbdea9
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/pr59221.cpp
@@ -0,0 +1,88 @@
+// Test for PR59221. Tests the compiler wouldn't misoptimize the final result.
+//
+// REQUIRES: x86-registered-target
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 %s -O3 -S -emit-llvm -o - | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+template <typename T> struct task {
+	struct promise_type {
+		T value{123};
+		std::coroutine_handle<> caller{std::noop_coroutine()};
+		
+		struct final_awaiter: std::suspend_always {
+			auto await_suspend(std::coroutine_handle<promise_type> me) const noexcept {
+				return me.promise().caller;
+			}
+		};
+
+		constexpr auto initial_suspend() const noexcept {
+			return std::suspend_always();
+		}
+		constexpr auto final_suspend() const noexcept {
+			return final_awaiter{};
+		}
+		auto unhandled_exception() noexcept {
+			// ignore
+		}
+		constexpr void return_value(T v) noexcept {
+			value = v;
+		} 
+		constexpr auto & get_return_object() noexcept {
+			return *this;
+		}
+	};
+	
+	using coroutine_handle = std::coroutine_handle<promise_type>;
+	
+	promise_type & promise{nullptr};
+	
+	task(promise_type & p) noexcept: promise{p} { }
+	
+	~task() noexcept {
+		coroutine_handle::from_promise(promise).destroy();
+	}
+	
+	auto await_ready() noexcept {
+        return false;
+    }
+
+    auto await_suspend(std::coroutine_handle<> caller) noexcept {
+        promise.caller = caller;
+        return coroutine_handle::from_promise(promise);
+    }
+
+    constexpr auto await_resume() const noexcept {
+        return promise.value;
+    }
+	
+	// non-coroutine access to result
+	auto get() noexcept {
+		const auto handle = coroutine_handle::from_promise(promise);
+		
+		if (!handle.done()) {
+			handle.resume();
+		}
+
+        return promise.value;
+	}
+};
+
+
+static inline auto a() noexcept -> task<int> {
+	co_return 42;
+}
+
+static inline auto test() noexcept -> task<int> {
+	co_return co_await a();
+}
+
+int foo() {
+	return test().get();
+}
+
+// Checks that the store for the result value 42 is not misoptimized out.
+// CHECK: define{{.*}}_Z3foov(
+// CHECK: store i32 42, ptr %{{.*}}
+// CHECK: }

diff  --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 36ede8ebccc6..1171878f749a 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -516,13 +516,6 @@ static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
   Function *NewF =
       Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
                        OrigF.getName() + Suffix);
-  if (Shape.ABI != coro::ABI::Async)
-    NewF->addParamAttr(0, Attribute::NonNull);
-
-  // For the async lowering ABI we can't guarantee that the context argument is
-  // not access via a 
diff erent pointer not based on the argument.
-  if (Shape.ABI != coro::ABI::Async)
-    NewF->addParamAttr(0, Attribute::NoAlias);
 
   M->getFunctionList().insert(InsertBefore, NewF);
 
@@ -849,11 +842,15 @@ Value *CoroCloner::deriveNewFramePointer() {
 }
 
 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
-                                 unsigned ParamIndex,
-                                 uint64_t Size, Align Alignment) {
+                                 unsigned ParamIndex, uint64_t Size,
+                                 Align Alignment, bool NoAlias) {
   AttrBuilder ParamAttrs(Context);
   ParamAttrs.addAttribute(Attribute::NonNull);
-  ParamAttrs.addAttribute(Attribute::NoAlias);
+  ParamAttrs.addAttribute(Attribute::NoUndef);
+
+  if (NoAlias)
+    ParamAttrs.addAttribute(Attribute::NoAlias);
+
   ParamAttrs.addAlignmentAttr(Alignment);
   ParamAttrs.addDereferenceableAttr(Size);
   Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
@@ -959,8 +956,8 @@ void CoroCloner::create() {
     NewAttrs = NewAttrs.addFnAttributes(
         Context, AttrBuilder(Context, OrigAttrs.getFnAttrs()));
 
-    addFramePointerAttrs(NewAttrs, Context, 0,
-                         Shape.FrameSize, Shape.FrameAlign);
+    addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize,
+                         Shape.FrameAlign, /*NoAlias=*/false);
     break;
   case coro::ABI::Async: {
     auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
@@ -989,9 +986,12 @@ void CoroCloner::create() {
     // full-stop.
     NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
 
+    /// FIXME: Is it really good to add the NoAlias attribute?
     addFramePointerAttrs(NewAttrs, Context, 0,
                          Shape.getRetconCoroId()->getStorageSize(),
-                         Shape.getRetconCoroId()->getStorageAlignment());
+                         Shape.getRetconCoroId()->getStorageAlignment(),
+                         /*NoAlias=*/true);
+
     break;
   }
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-debug-dbg.addr.ll b/llvm/test/Transforms/Coroutines/coro-debug-dbg.addr.ll
index 49eabbf03565..54db4473fe39 100644
--- a/llvm/test/Transforms/Coroutines/coro-debug-dbg.addr.ll
+++ b/llvm/test/Transforms/Coroutines/coro-debug-dbg.addr.ll
@@ -8,20 +8,20 @@
 ; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split,coro-split)' -S | FileCheck %s
 
 ; This file is based on coro-debug-frame-variable.ll.
-; CHECK:  define internal fastcc void @f.resume(%f.Frame* noalias nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[RESUME_FN_DBG_NUM:[0-9]+]]
+; CHECK:  define internal fastcc void @f.resume(%f.Frame* noundef nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[RESUME_FN_DBG_NUM:[0-9]+]]
 ; CHECK-NEXT:       entry.resume:
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[XVAR_RESUME:[0-9]+]],
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[YVAR_RESUME:[0-9]+]],
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[ZVAR_RESUME:[0-9]+]],
 
-; CHECK:  define internal fastcc void @f.destroy(%f.Frame* noalias nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[DESTROY_FN_DBG_NUM:[0-9]+]] {
+; CHECK:  define internal fastcc void @f.destroy(%f.Frame* noundef nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[DESTROY_FN_DBG_NUM:[0-9]+]] {
 ; CHECK-NEXT:       entry.destroy:
 ; CHECK-NEXT:         %FramePtr.debug = alloca
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[XVAR_DESTROY:[0-9]+]],
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[YVAR_DESTROY:[0-9]+]],
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[ZVAR_DESTROY:[0-9]+]],
 
-; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[CLEANUP_FN_DBG_NUM:[0-9]+]] {
+; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noundef nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[CLEANUP_FN_DBG_NUM:[0-9]+]] {
 ; CHECK:       entry.cleanup:
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[XVAR_CLEANUP:[0-9]+]],
 ; CHECK:         call void @llvm.dbg.addr(metadata %f.Frame** %FramePtr.debug, metadata ![[YVAR_CLEANUP:[0-9]+]],

diff  --git a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll
index 7086cee61367..2fe55b63f5a5 100644
--- a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll
+++ b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split,coro-split)' -S | FileCheck %s
 ;
 ; This file is based on coro-debug-frame-variable.ll.
-; CHECK:  define internal fastcc void @f.resume(%f.Frame* noalias nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[RESUME_FN_DBG_NUM:[0-9]+]]
+; CHECK:  define internal fastcc void @f.resume(%f.Frame* noundef nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[RESUME_FN_DBG_NUM:[0-9]+]]
 ; CHECK:       await.ready:
 ; CHECK:         call void @llvm.dbg.value(metadata i32 undef, metadata ![[IVAR_RESUME:[0-9]+]], metadata !DIExpression(
 ; CHECK:         call void @llvm.dbg.value(metadata i32 undef, metadata ![[JVAR_RESUME:[0-9]+]], metadata !DIExpression(

diff  --git a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll
index f4596c21c34f..ac8027420e20 100644
--- a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll
+++ b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split,coro-split)' -S | FileCheck %s
 ;
 ; This file is based on coro-debug-frame-variable.ll.
-; CHECK:  define internal fastcc void @f.resume(%f.Frame* noalias nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[RESUME_FN_DBG_NUM:[0-9]+]]
+; CHECK:  define internal fastcc void @f.resume(%f.Frame* noundef nonnull align 16 dereferenceable(80) %FramePtr) !dbg ![[RESUME_FN_DBG_NUM:[0-9]+]]
 ; CHECK:       init.ready:
 ; CHECK:         call void @llvm.dbg.value(metadata %f.Frame** %FramePtr.debug, metadata ![[XVAR_RESUME:[0-9]+]],
 ; CHECK:       await.ready:

diff  --git a/llvm/test/Transforms/Coroutines/coro-debug.ll b/llvm/test/Transforms/Coroutines/coro-debug.ll
index abb9edc33de8..0716ddba9735 100644
--- a/llvm/test/Transforms/Coroutines/coro-debug.ll
+++ b/llvm/test/Transforms/Coroutines/coro-debug.ll
@@ -172,7 +172,7 @@ attributes #7 = { noduplicate }
 !32 = !DILocalVariable(name: "inline_asm", scope: !6, file: !7, line: 55, type: !11)
 
 ; CHECK: define i8* @f(i32 %x) #0 personality i32 0 !dbg ![[ORIG:[0-9]+]]
-; CHECK: define internal fastcc void @f.resume(%f.Frame* noalias nonnull align 8 dereferenceable(40) %FramePtr) #0 personality i32 0 !dbg ![[RESUME:[0-9]+]]
+; CHECK: define internal fastcc void @f.resume(%f.Frame* noundef nonnull align 8 dereferenceable(40) %FramePtr) #0 personality i32 0 !dbg ![[RESUME:[0-9]+]]
 ; CHECK: entry.resume:
 ; CHECK: %[[DBG_PTR:.*]] = alloca %f.Frame*
 ; CHECK: call void @llvm.dbg.declare(metadata %f.Frame** %[[DBG_PTR]], metadata ![[RESUME_COROHDL:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst,
@@ -194,8 +194,8 @@ attributes #7 = { noduplicate }
 ; CHECK-NEXT: to label %[[DEFAULT_DEST:.+]] [label
 ; CHECK: [[DEFAULT_DEST]]:
 ; CHECK-NEXT: call void @llvm.dbg.declare(metadata i32 %[[CALLBR_RES]]
-; CHECK: define internal fastcc void @f.destroy(%f.Frame* noalias nonnull align 8 dereferenceable(40) %FramePtr) #0 personality i32 0 !dbg ![[DESTROY:[0-9]+]]
-; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull align 8 dereferenceable(40) %FramePtr) #0 personality i32 0 !dbg ![[CLEANUP:[0-9]+]]
+; CHECK: define internal fastcc void @f.destroy(%f.Frame* noundef nonnull align 8 dereferenceable(40) %FramePtr) #0 personality i32 0 !dbg ![[DESTROY:[0-9]+]]
+; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noundef nonnull align 8 dereferenceable(40) %FramePtr) #0 personality i32 0 !dbg ![[CLEANUP:[0-9]+]]
 
 ; CHECK: ![[ORIG]] = distinct !DISubprogram(name: "f", linkageName: "flink"
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-frame.ll b/llvm/test/Transforms/Coroutines/coro-frame.ll
index ec35b34dc574..f4c7f90a61ad 100644
--- a/llvm/test/Transforms/Coroutines/coro-frame.ll
+++ b/llvm/test/Transforms/Coroutines/coro-frame.ll
@@ -44,7 +44,7 @@ pad:
 ; CHECK: ret i8* %hdl
 
 ; See if the float was loaded from the frame
-; CHECK-LABEL: @f.resume(%f.Frame* noalias nonnull align 8
+; CHECK-LABEL: @f.resume(%f.Frame* noundef nonnull align 8
 ; CHECK: %r.reload = load double, double* %r.reload.addr
 ; CHECK: call double @print(double %r.reload)
 ; CHECK: ret void


        


More information about the cfe-commits mailing list