[llvm] 5c902af - [coro async] Add code to support dynamic aligment of over-aligned types in async frames
Arnold Schwaighofer via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 3 07:07:07 PDT 2022
Author: Arnold Schwaighofer
Date: 2022-06-03T07:06:14-07:00
New Revision: 5c902af572bf8670fc0bb9d5709f2f0447339033
URL: https://github.com/llvm/llvm-project/commit/5c902af572bf8670fc0bb9d5709f2f0447339033
DIFF: https://github.com/llvm/llvm-project/commit/5c902af572bf8670fc0bb9d5709f2f0447339033.diff
LOG: [coro async] Add code to support dynamic aligment of over-aligned types in async frames
Async context frames are allocated with a maximum alignment. If a type
requests an alignment bigger than that dynamically align the address
in the frame.
Differential Revision: https://reviews.llvm.org/D126715
Added:
llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll
Modified:
llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 72869f7e013c2..32241f318be88 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -351,6 +351,17 @@ struct FrameDataInfo {
FieldAlignMap.insert({V, Align});
}
+ uint64_t getDynamicAlign(Value *V) const {
+ auto Iter = FieldDynamicAlignMap.find(V);
+ assert(Iter != FieldDynamicAlignMap.end());
+ return Iter->second;
+ }
+
+ void setDynamicAlign(Value *V, uint64_t Align) {
+ assert(FieldDynamicAlignMap.count(V) == 0);
+ FieldDynamicAlignMap.insert({V, Align});
+ }
+
uint64_t getOffset(Value *V) const {
auto Iter = FieldOffsetMap.find(V);
assert(Iter != FieldOffsetMap.end());
@@ -376,6 +387,7 @@ struct FrameDataInfo {
// Map from values to their alignment on the frame. They would be set after
// the frame is built.
DenseMap<Value *, uint64_t> FieldAlignMap;
+ DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
// Map from values to their offset on the frame. They would be set after
// the frame is built.
DenseMap<Value *, uint64_t> FieldOffsetMap;
@@ -416,6 +428,7 @@ class FrameTypeBuilder {
FieldIDType LayoutFieldIndex;
Align Alignment;
Align TyAlignment;
+ uint64_t DynamicAlignBuffer;
};
const DataLayout &DL;
@@ -510,6 +523,18 @@ class FrameTypeBuilder {
FieldAlignment = TyAlignment;
}
+ // The field alignment could be bigger than the max frame case, in that case
+ // we request additional storage to be able to dynamically align the
+ // pointer.
+ uint64_t DynamicAlignBuffer = 0;
+ if (MaxFrameAlignment &&
+ (FieldAlignment.valueOrOne() > *MaxFrameAlignment)) {
+ DynamicAlignBuffer =
+ offsetToAlignment((*MaxFrameAlignment).value(), *FieldAlignment);
+ FieldAlignment = *MaxFrameAlignment;
+ FieldSize = FieldSize + DynamicAlignBuffer;
+ }
+
// Lay out header fields immediately.
uint64_t Offset;
if (IsHeader) {
@@ -521,7 +546,8 @@ class FrameTypeBuilder {
Offset = OptimizedStructLayoutField::FlexibleOffset;
}
- Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment});
+ Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment,
+ DynamicAlignBuffer});
return Fields.size() - 1;
}
@@ -555,6 +581,11 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
auto Field = B.getLayoutField(getFieldIndex(I));
setFieldIndex(I, Field.LayoutFieldIndex);
setAlign(I, Field.Alignment.value());
+ uint64_t dynamicAlign =
+ Field.DynamicAlignBuffer
+ ? Field.DynamicAlignBuffer + Field.Alignment.value()
+ : 0;
+ setDynamicAlign(I, dynamicAlign);
setOffset(I, Field.Offset);
};
LayoutIndexUpdateStarted = true;
@@ -748,6 +779,10 @@ void FrameTypeBuilder::finish(StructType *Ty) {
F.LayoutFieldIndex = FieldTypes.size();
FieldTypes.push_back(F.Ty);
+ if (F.DynamicAlignBuffer) {
+ FieldTypes.push_back(
+ ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
+ }
LastOffset = Offset + F.Size;
}
@@ -1543,7 +1578,17 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
auto GEP = cast<GetElementPtrInst>(
Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
- if (isa<AllocaInst>(Orig)) {
+ if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
+ if (FrameData.getDynamicAlign(Orig) != 0) {
+ assert(FrameData.getDynamicAlign(Orig) == AI->getAlignment());
+ auto *M = AI->getModule();
+ auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
+ auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
+ auto *AlignMask = ConstantInt::get(IntPtrTy, AI->getAlignment() - 1);
+ PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
+ PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
+ return Builder.CreateIntToPtr(PtrValue, AI->getType());
+ }
// If the type of GEP is not equal to the type of AllocaInst, it implies
// that the AllocaInst may be reused in the Frame slot of other
// AllocaInst. So We cast GEP to the AllocaInst here to re-use
diff --git a/llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll b/llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll
new file mode 100644
index 0000000000000..94d23bbdacbea
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -O0 -S | FileCheck %s
+
+target datalayout = "p:64:64:64"
+
+%async.task = type { i64 }
+%async.actor = type { i64 }
+%async.fp = type <{ i32, i32 }>
+
+%async.ctxt = type { i8*, void (i8*)* }
+
+ at my_other_async_function_fp = external global <{ i32, i32 }>
+declare void @my_other_async_function(i8* %async.ctxt)
+
+ at my_async_function_fp = constant <{ i32, i32 }>
+ <{ i32 trunc (
+ i64 sub (
+ i64 ptrtoint (void (i8*)* @my_async_function to i64),
+ i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function_fp, i32 0, i32 1) to i64)
+ )
+ to i32),
+ i32 32
+}>
+
+declare void @opaque(i64*)
+declare i8* @llvm.coro.async.context.alloc(i8*, i8*)
+declare void @llvm.coro.async.context.dealloc(i8*)
+declare i8* @llvm.coro.async.resume()
+declare token @llvm.coro.id.async(i32, i32, i32, i8*)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end.async(i8*, i1, ...)
+declare i1 @llvm.coro.end(i8*, i1)
+declare swiftcc void @asyncReturn(i8*)
+declare swiftcc void @asyncSuspend(i8*)
+declare {i8*} @llvm.coro.suspend.async(i32, i8*, i8*, ...)
+
+define swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* %fnPtr, i8* %async.ctxt) {
+ %callee = bitcast i8* %fnPtr to void(i8*)*
+ tail call swiftcc void %callee(i8* %async.ctxt)
+ ret void
+}
+
+define i8* @__swift_async_resume_project_context(i8* %ctxt) {
+entry:
+ %resume_ctxt_addr = bitcast i8* %ctxt to i8**
+ %resume_ctxt = load i8*, i8** %resume_ctxt_addr, align 8
+ ret i8* %resume_ctxt
+}
+
+
+; CHECK: %my_async_function.Frame = type { i64, [48 x i8], i64, i64, [16 x i8], i8*, i64, i8* }
+; CHECK: define swiftcc void @my_async_function
+; CHECK: [[T0:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 3
+; CHECK: [[T1:%.*]] = ptrtoint i64* [[T0]] to i64
+; CHECK: [[T2:%.*]] = add i64 [[T1]], 31
+; CHECK: [[T3:%.*]] = and i64 [[T2]], -32
+; CHECK: [[T4:%.*]] = inttoptr i64 [[T3]] to i64*
+; CHECK: [[T5:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 0
+; CHECK: [[T6:%.*]] = ptrtoint i64* [[T5]] to i64
+; CHECK: [[T7:%.*]] = add i64 [[T6]], 63
+; CHECK: [[T8:%.*]] = and i64 [[T7]], -64
+; CHECK: [[T9:%.*]] = inttoptr i64 [[T8]] to i64*
+; CHECK: store i64 2, i64* [[T4]]
+; CHECK: store i64 3, i64* [[T9]]
+
+define swiftcc void @my_async_function(i8* swiftasync %async.ctxt) "coroutine.presplit"="1" {
+entry:
+ %tmp = alloca i64, align 8
+ %tmp2 = alloca i64, align 16
+ %tmp3 = alloca i64, align 32
+ %tmp4 = alloca i64, align 64
+
+ %id = call token @llvm.coro.id.async(i32 32, i32 16, i32 0,
+ i8* bitcast (<{i32, i32}>* @my_async_function_fp to i8*))
+ %hdl = call i8* @llvm.coro.begin(token %id, i8* null)
+ store i64 0, i64* %tmp
+ store i64 1, i64* %tmp2
+ store i64 2, i64* %tmp3
+ store i64 3, i64* %tmp4
+
+ %callee_context = call i8* @llvm.coro.async.context.alloc(i8* null, i8* null)
+ %callee_context.0 = bitcast i8* %callee_context to %async.ctxt*
+ %callee_context.return_to_caller.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 1
+ %return_to_caller.addr = bitcast void(i8*)** %callee_context.return_to_caller.addr to i8**
+ %resume.func_ptr = call i8* @llvm.coro.async.resume()
+ store i8* %resume.func_ptr, i8** %return_to_caller.addr
+
+ %callee = bitcast void(i8*)* @asyncSuspend to i8*
+ %resume_proj_fun = bitcast i8*(i8*)* @__swift_async_resume_project_context to i8*
+ %res = call {i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
+ i8* %resume.func_ptr,
+ i8* %resume_proj_fun,
+ void (i8*, i8*)* @my_async_function.my_other_async_function_fp.apply,
+ i8* %callee, i8* %callee_context)
+ call void @opaque(i64* %tmp)
+ call void @opaque(i64* %tmp2)
+ call void @opaque(i64* %tmp3)
+ call void @opaque(i64* %tmp4)
+ call void @llvm.coro.async.context.dealloc(i8* %callee_context)
+ tail call swiftcc void @asyncReturn(i8* %async.ctxt)
+ call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
+ unreachable
+}
More information about the llvm-commits
mailing list