[llvm] c8ecf12 - [Coroutines] Offering llvm.coro.align intrinsic

Chuanqi Xu via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 18 17:53:09 PST 2022


Author: Chuanqi Xu
Date: 2022-01-19T09:52:45+08:00
New Revision: c8ecf12bc3e439a7f6525a430db9ad5ac7a7e1c1

URL: https://github.com/llvm/llvm-project/commit/c8ecf12bc3e439a7f6525a430db9ad5ac7a7e1c1
DIFF: https://github.com/llvm/llvm-project/commit/c8ecf12bc3e439a7f6525a430db9ad5ac7a7e1c1.diff

LOG: [Coroutines] Offering llvm.coro.align intrinsic

It is a known problem that we can't align the switch-based coroutine
frame if the alignment exceeds std::max_align_t (which is 16 usually).

We could solve the problem on the middle-end by dynamically transforming
or in the frontend by emitting aligned allocation function.

If we need to solve it in the frontend, the middle end need to offer an
intrinsic to tell the alignment at least. This patch tries to offer such
an intrinsic called llvm.coro.align.

Reviewed By: https://reviews.llvm.org/D117542

Differential revision: https://reviews.llvm.org/D117542

Added: 
    llvm/test/Transforms/Coroutines/coro-align-01.ll
    llvm/test/Transforms/Coroutines/coro-align-02.ll
    llvm/test/Transforms/Coroutines/coro-align-03.ll
    llvm/test/Transforms/Coroutines/coro-align-04.ll
    llvm/test/Transforms/Coroutines/coro-align-05.ll

Modified: 
    llvm/docs/Coroutines.rst
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/IR/Intrinsics.td
    llvm/lib/Transforms/Coroutines/CoroInstr.h
    llvm/lib/Transforms/Coroutines/CoroInternal.h
    llvm/lib/Transforms/Coroutines/CoroSplit.cpp
    llvm/lib/Transforms/Coroutines/Coroutines.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst
index dfb3d1b6081d..00e8913124d2 100644
--- a/llvm/docs/Coroutines.rst
+++ b/llvm/docs/Coroutines.rst
@@ -948,6 +948,32 @@ Semantics:
 The `coro.size` intrinsic is lowered to a constant representing the size of
 the coroutine frame.
 
+.. _coro.align:
+
+'llvm.coro.align' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+    declare i32 @llvm.coro.align.i32()
+    declare i64 @llvm.coro.align.i64()
+
+Overview:
+"""""""""
+
+The '``llvm.coro.align``' intrinsic returns the alignment of a `coroutine frame`_.
+This is only supported for switched-resume coroutines.
+
+Arguments:
+""""""""""
+
+None
+
+Semantics:
+""""""""""
+
+The `coro.align` intrinsic is lowered to a constant representing the alignment of
+the coroutine frame.
+
 .. _coro.begin:
 
 'llvm.coro.begin' Intrinsic

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 72a7d3cbdd30..4b9ef7c57ffc 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -633,6 +633,7 @@ class TargetTransformInfoImplBase {
     case Intrinsic::coro_end:
     case Intrinsic::coro_frame:
     case Intrinsic::coro_size:
+    case Intrinsic::coro_align:
     case Intrinsic::coro_suspend:
     case Intrinsic::coro_subfn_addr:
       // These intrinsics don't actually represent code after lowering.

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 20731f1d5ce8..3e40bbf39dd4 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1272,6 +1272,7 @@ def int_coro_end_async
 def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
 
 def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
 def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;

diff  --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index bf3d781ba43e..014938c15a0a 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -599,6 +599,18 @@ class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst {
   }
 };
 
+/// This represents the llvm.coro.align instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst {
+public:
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_align;
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
 class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
   enum { FrameArg, UnwindArg };
 

diff  --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 179873eb0047..9a17068df3a9 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -104,6 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
   CoroBeginInst *CoroBegin;
   SmallVector<AnyCoroEndInst *, 4> CoroEnds;
   SmallVector<CoroSizeInst *, 2> CoroSizes;
+  SmallVector<CoroAlignInst *, 2> CoroAligns;
   SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
   SmallVector<CallInst*, 2> SwiftErrorOps;
 

diff  --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index aadd7fe3dd56..2e4661fa1d0a 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1083,10 +1083,16 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
   Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
 }
 
-static void replaceFrameSize(coro::Shape &Shape) {
+static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
   if (Shape.ABI == coro::ABI::Async)
     updateAsyncFuncPointerContextSize(Shape);
 
+  for (CoroAlignInst *CA : Shape.CoroAligns) {
+    CA->replaceAllUsesWith(
+        ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
+    CA->eraseFromParent();
+  }
+
   if (Shape.CoroSizes.empty())
     return;
 
@@ -1884,7 +1890,7 @@ static coro::Shape splitCoroutine(Function &F,
 
   simplifySuspendPoints(Shape);
   buildCoroutineFrame(F, Shape);
-  replaceFrameSize(Shape);
+  replaceFrameSizeAndAlignment(Shape);
 
   // If there are no suspend points, no split required, just remove
   // the allocation and deallocation blocks, they are not needed.

diff  --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index fba8b03e44ba..1078aac257d7 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -123,6 +123,7 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
 static bool isCoroutineIntrinsicName(StringRef Name) {
   // NOTE: Must be sorted!
   static const char *const CoroIntrinsics[] = {
+      "llvm.coro.align",
       "llvm.coro.alloc",
       "llvm.coro.async.context.alloc",
       "llvm.coro.async.context.dealloc",
@@ -268,6 +269,9 @@ void coro::Shape::buildFrom(Function &F) {
       case Intrinsic::coro_size:
         CoroSizes.push_back(cast<CoroSizeInst>(II));
         break;
+      case Intrinsic::coro_align:
+        CoroAligns.push_back(cast<CoroAlignInst>(II));
+        break;
       case Intrinsic::coro_frame:
         CoroFrames.push_back(cast<CoroFrameInst>(II));
         break;

diff  --git a/llvm/test/Transforms/Coroutines/coro-align-01.ll b/llvm/test/Transforms/Coroutines/coro-align-01.ll
new file mode 100644
index 000000000000..8ea76ebba716
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-align-01.ll
@@ -0,0 +1,54 @@
+; Tests that the coro.align intrinsic could be lowered to correct alignment
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %x = alloca i64
+  %y = alloca i64
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %align = call i32 @llvm.coro.align.i32()
+  %alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume
+                                  i8 1, label %cleanup]
+resume:
+  %x.alias = bitcast i64* %x to i32*
+  call void @capture_call(i32* %x.alias)
+  %y.alias = bitcast i64* %y to i32*
+  call void @nocapture_call(i32* %y.alias)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
+; CHECK:        %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1 }
+; CHECK-LABEL:  define i8* @f()
+; CHECK:          %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 32)
+; CHECK-NEXT:     call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.align.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @capture_call(i32*)
+declare void @nocapture_call(i32* nocapture)
+declare noalias i8* @aligned_alloc(i32, i32)
+declare void @free(i8*)

diff  --git a/llvm/test/Transforms/Coroutines/coro-align-02.ll b/llvm/test/Transforms/Coroutines/coro-align-02.ll
new file mode 100644
index 000000000000..89d25810dbe9
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-align-02.ll
@@ -0,0 +1,46 @@
+; Tests that the coro.align intrinsic could be lowered to correct alignment
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %align = call i32 @llvm.coro.align.i32()
+  %alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume
+                                  i8 1, label %cleanup]
+resume:
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
+; CHECK:        %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1 }
+; CHECK-LABEL:  define i8* @f()
+; CHECK:          %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 24)
+; CHECK-NEXT:     call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.align.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @aligned_alloc(i32, i32)
+declare void @free(i8*)

diff  --git a/llvm/test/Transforms/Coroutines/coro-align-03.ll b/llvm/test/Transforms/Coroutines/coro-align-03.ll
new file mode 100644
index 000000000000..a3c48b8e1320
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-align-03.ll
@@ -0,0 +1,54 @@
+; Tests that the coro.align intrinsic could be lowered to correct alignment
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %x = alloca i64, align 16
+  %y = alloca i64
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %align = call i32 @llvm.coro.align.i32()
+  %alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume
+                                  i8 1, label %cleanup]
+resume:
+  %x.alias = bitcast i64* %x to i32*
+  call void @capture_call(i32* %x.alias)
+  %y.alias = bitcast i64* %y to i32*
+  call void @capture_call(i32* %y.alias)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
+; CHECK:        %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 }
+; CHECK-LABEL:  define i8* @f()
+; CHECK:          %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 16, i32 40)
+; CHECK-NEXT:     call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.align.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @capture_call(i32*)
+declare void @nocapture_call(i32* nocapture)
+declare noalias i8* @aligned_alloc(i32, i32)
+declare void @free(i8*)

diff  --git a/llvm/test/Transforms/Coroutines/coro-align-04.ll b/llvm/test/Transforms/Coroutines/coro-align-04.ll
new file mode 100644
index 000000000000..cc38bb284258
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-align-04.ll
@@ -0,0 +1,54 @@
+; Tests that the coro.align intrinsic could be lowered to correct alignment
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %x = alloca i1, align 64
+  %y = alloca i64
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %align = call i32 @llvm.coro.align.i32()
+  %alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume
+                                  i8 1, label %cleanup]
+resume:
+  %x.alias = bitcast i1* %x to i32*
+  call void @capture_call(i32* %x.alias)
+  %y.alias = bitcast i64* %y to i32*
+  call void @capture_call(i32* %y.alias)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
+; CHECK:        %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1, [39 x i8], i1 }
+; CHECK-LABEL:  define i8* @f()
+; CHECK:          %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
+; CHECK-NEXT:     call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.align.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @capture_call(i32*)
+declare void @nocapture_call(i32* nocapture)
+declare noalias i8* @aligned_alloc(i32, i32)
+declare void @free(i8*)

diff  --git a/llvm/test/Transforms/Coroutines/coro-align-05.ll b/llvm/test/Transforms/Coroutines/coro-align-05.ll
new file mode 100644
index 000000000000..bbf60622c8b3
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-align-05.ll
@@ -0,0 +1,54 @@
+; Tests that the coro.align intrinsic could be lowered to correct alignment
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %x = alloca i1, align 64
+  %y = alloca i64, align 32
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %align = call i32 @llvm.coro.align.i32()
+  %alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume
+                                  i8 1, label %cleanup]
+resume:
+  %x.alias = bitcast i1* %x to i32*
+  call void @capture_call(i32* %x.alias)
+  %y.alias = bitcast i64* %y to i32*
+  call void @capture_call(i32* %y.alias)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
+; CHECK:        %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, [15 x i8], i64, [24 x i8], i1 }
+; CHECK-LABEL:  define i8* @f()
+; CHECK:          %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
+; CHECK-NEXT:     call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.align.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @capture_call(i32*)
+declare void @nocapture_call(i32* nocapture)
+declare noalias i8* @aligned_alloc(i32, i32)
+declare void @free(i8*)


        


More information about the llvm-commits mailing list