[llvm] 5c902af - [coro async] Add code to support dynamic aligment of over-aligned types in async frames

Arnold Schwaighofer via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 3 07:07:07 PDT 2022


Author: Arnold Schwaighofer
Date: 2022-06-03T07:06:14-07:00
New Revision: 5c902af572bf8670fc0bb9d5709f2f0447339033

URL: https://github.com/llvm/llvm-project/commit/5c902af572bf8670fc0bb9d5709f2f0447339033
DIFF: https://github.com/llvm/llvm-project/commit/5c902af572bf8670fc0bb9d5709f2f0447339033.diff

LOG: [coro async] Add code to support dynamic aligment of over-aligned types in async frames

Async context frames are allocated with a maximum alignment. If a type
requests an alignment bigger than that dynamically align the address
in the frame.

Differential Revision: https://reviews.llvm.org/D126715

Added: 
    llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll

Modified: 
    llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 72869f7e013c2..32241f318be88 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -351,6 +351,17 @@ struct FrameDataInfo {
     FieldAlignMap.insert({V, Align});
   }
 
+  uint64_t getDynamicAlign(Value *V) const {
+    auto Iter = FieldDynamicAlignMap.find(V);
+    assert(Iter != FieldDynamicAlignMap.end());
+    return Iter->second;
+  }
+
+  void setDynamicAlign(Value *V, uint64_t Align) {
+    assert(FieldDynamicAlignMap.count(V) == 0);
+    FieldDynamicAlignMap.insert({V, Align});
+  }
+
   uint64_t getOffset(Value *V) const {
     auto Iter = FieldOffsetMap.find(V);
     assert(Iter != FieldOffsetMap.end());
@@ -376,6 +387,7 @@ struct FrameDataInfo {
   // Map from values to their alignment on the frame. They would be set after
   // the frame is built.
   DenseMap<Value *, uint64_t> FieldAlignMap;
+  DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
   // Map from values to their offset on the frame. They would be set after
   // the frame is built.
   DenseMap<Value *, uint64_t> FieldOffsetMap;
@@ -416,6 +428,7 @@ class FrameTypeBuilder {
     FieldIDType LayoutFieldIndex;
     Align Alignment;
     Align TyAlignment;
+    uint64_t DynamicAlignBuffer;
   };
 
   const DataLayout &DL;
@@ -510,6 +523,18 @@ class FrameTypeBuilder {
       FieldAlignment = TyAlignment;
     }
 
+    // The field alignment could be bigger than the max frame case, in that case
+    // we request additional storage to be able to dynamically align the
+    // pointer.
+    uint64_t DynamicAlignBuffer = 0;
+    if (MaxFrameAlignment &&
+        (FieldAlignment.valueOrOne() > *MaxFrameAlignment)) {
+      DynamicAlignBuffer =
+          offsetToAlignment((*MaxFrameAlignment).value(), *FieldAlignment);
+      FieldAlignment = *MaxFrameAlignment;
+      FieldSize = FieldSize + DynamicAlignBuffer;
+    }
+
     // Lay out header fields immediately.
     uint64_t Offset;
     if (IsHeader) {
@@ -521,7 +546,8 @@ class FrameTypeBuilder {
       Offset = OptimizedStructLayoutField::FlexibleOffset;
     }
 
-    Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment});
+    Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment,
+                      DynamicAlignBuffer});
     return Fields.size() - 1;
   }
 
@@ -555,6 +581,11 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
     auto Field = B.getLayoutField(getFieldIndex(I));
     setFieldIndex(I, Field.LayoutFieldIndex);
     setAlign(I, Field.Alignment.value());
+    uint64_t dynamicAlign =
+        Field.DynamicAlignBuffer
+            ? Field.DynamicAlignBuffer + Field.Alignment.value()
+            : 0;
+    setDynamicAlign(I, dynamicAlign);
     setOffset(I, Field.Offset);
   };
   LayoutIndexUpdateStarted = true;
@@ -748,6 +779,10 @@ void FrameTypeBuilder::finish(StructType *Ty) {
     F.LayoutFieldIndex = FieldTypes.size();
 
     FieldTypes.push_back(F.Ty);
+    if (F.DynamicAlignBuffer) {
+      FieldTypes.push_back(
+          ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
+    }
     LastOffset = Offset + F.Size;
   }
 
@@ -1543,7 +1578,17 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
 
     auto GEP = cast<GetElementPtrInst>(
         Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
-    if (isa<AllocaInst>(Orig)) {
+    if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
+      if (FrameData.getDynamicAlign(Orig) != 0) {
+        assert(FrameData.getDynamicAlign(Orig) == AI->getAlignment());
+        auto *M = AI->getModule();
+        auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
+        auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
+        auto *AlignMask = ConstantInt::get(IntPtrTy, AI->getAlignment() - 1);
+        PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
+        PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
+        return Builder.CreateIntToPtr(PtrValue, AI->getType());
+      }
       // If the type of GEP is not equal to the type of AllocaInst, it implies
       // that the AllocaInst may be reused in the Frame slot of other
       // AllocaInst. So We cast GEP to the AllocaInst here to re-use

diff  --git a/llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll b/llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll
new file mode 100644
index 0000000000000..94d23bbdacbea
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s  -O0 -S | FileCheck  %s
+
+target datalayout = "p:64:64:64"
+
+%async.task = type { i64 }
+%async.actor = type { i64 }
+%async.fp = type <{ i32, i32 }>
+
+%async.ctxt = type { i8*, void (i8*)* }
+
+ at my_other_async_function_fp = external global <{ i32, i32 }>
+declare void @my_other_async_function(i8* %async.ctxt)
+
+ at my_async_function_fp = constant <{ i32, i32 }>
+  <{ i32 trunc (
+       i64 sub (
+         i64 ptrtoint (void (i8*)* @my_async_function to i64),
+         i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function_fp, i32 0, i32 1) to i64)
+       )
+     to i32),
+     i32 32
+}>
+
+declare void @opaque(i64*)
+declare i8* @llvm.coro.async.context.alloc(i8*, i8*)
+declare void @llvm.coro.async.context.dealloc(i8*)
+declare i8* @llvm.coro.async.resume()
+declare token @llvm.coro.id.async(i32, i32, i32, i8*)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end.async(i8*, i1, ...)
+declare i1 @llvm.coro.end(i8*, i1)
+declare swiftcc void @asyncReturn(i8*)
+declare swiftcc void @asyncSuspend(i8*)
+declare {i8*} @llvm.coro.suspend.async(i32, i8*, i8*, ...)
+
+define swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* %fnPtr, i8* %async.ctxt) {
+  %callee = bitcast i8* %fnPtr to void(i8*)*
+  tail call swiftcc void %callee(i8* %async.ctxt)
+  ret void
+}
+
+define i8* @__swift_async_resume_project_context(i8* %ctxt) {
+entry:
+  %resume_ctxt_addr = bitcast i8* %ctxt to i8**
+  %resume_ctxt = load i8*, i8** %resume_ctxt_addr, align 8
+  ret i8* %resume_ctxt
+}
+
+
+; CHECK: %my_async_function.Frame = type { i64, [48 x i8], i64, i64, [16 x i8], i8*, i64, i8* }
+; CHECK: define swiftcc void @my_async_function
+; CHECK:  [[T0:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 3
+; CHECK:  [[T1:%.*]] = ptrtoint i64* [[T0]] to i64
+; CHECK:  [[T2:%.*]] = add i64 [[T1]], 31
+; CHECK:  [[T3:%.*]] = and i64 [[T2]], -32
+; CHECK:  [[T4:%.*]] = inttoptr i64 [[T3]] to i64*
+; CHECK:  [[T5:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 0
+; CHECK:  [[T6:%.*]] = ptrtoint i64* [[T5]] to i64
+; CHECK:  [[T7:%.*]] = add i64 [[T6]], 63
+; CHECK:  [[T8:%.*]] = and i64 [[T7]], -64
+; CHECK:  [[T9:%.*]] = inttoptr i64 [[T8]] to i64*
+; CHECK:  store i64 2, i64* [[T4]]
+; CHECK:  store i64 3, i64* [[T9]]
+
+define swiftcc void @my_async_function(i8* swiftasync %async.ctxt) "coroutine.presplit"="1" {
+entry:
+  %tmp = alloca i64, align 8
+  %tmp2 = alloca i64, align 16
+  %tmp3 = alloca i64, align 32
+  %tmp4 = alloca i64, align 64
+
+  %id = call token @llvm.coro.id.async(i32 32, i32 16, i32 0,
+          i8* bitcast (<{i32, i32}>* @my_async_function_fp to i8*))
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* null)
+  store i64 0, i64* %tmp
+  store i64 1, i64* %tmp2
+  store i64 2, i64* %tmp3
+  store i64 3, i64* %tmp4
+
+  %callee_context = call i8* @llvm.coro.async.context.alloc(i8* null, i8* null)
+	%callee_context.0 = bitcast i8* %callee_context to %async.ctxt*
+  %callee_context.return_to_caller.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 1
+  %return_to_caller.addr = bitcast void(i8*)** %callee_context.return_to_caller.addr to i8**
+  %resume.func_ptr = call i8* @llvm.coro.async.resume()
+  store i8* %resume.func_ptr, i8** %return_to_caller.addr
+
+  %callee = bitcast void(i8*)* @asyncSuspend to i8*
+  %resume_proj_fun = bitcast i8*(i8*)* @__swift_async_resume_project_context to i8*
+  %res = call {i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
+                                                  i8* %resume.func_ptr,
+                                                  i8* %resume_proj_fun,
+                                                  void (i8*, i8*)* @my_async_function.my_other_async_function_fp.apply,
+                                                  i8* %callee, i8* %callee_context)
+  call void @opaque(i64* %tmp)
+  call void @opaque(i64* %tmp2)
+  call void @opaque(i64* %tmp3)
+  call void @opaque(i64* %tmp4)
+  call void @llvm.coro.async.context.dealloc(i8* %callee_context)
+  tail call swiftcc void @asyncReturn(i8* %async.ctxt)
+  call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
+  unreachable
+}


        


More information about the llvm-commits mailing list