[llvm] 2937f8d - [coro async] Cap the alignment of spilled values (vs. allocas) at the max frame alignment

Arnold Schwaighofer via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 7 08:07:33 PDT 2021


Author: Arnold Schwaighofer
Date: 2021-07-07T08:06:25-07:00
New Revision: 2937f8d14840f54bb10daf71c7af236f4d897884

URL: https://github.com/llvm/llvm-project/commit/2937f8d14840f54bb10daf71c7af236f4d897884
DIFF: https://github.com/llvm/llvm-project/commit/2937f8d14840f54bb10daf71c7af236f4d897884.diff

LOG: [coro async] Cap the alignment of spilled values (vs.  allocas) at the max frame alignment

Before this patch we would normally use the ABI alignment which can be
to high for the context alginment.

For spilled values we don't need ABI alignment, since the frame entry's
address is not escaped.

rdar://79664965

Differential Revision: https://reviews.llvm.org/D105288

Added: 
    

Modified: 
    llvm/lib/Transforms/Coroutines/CoroFrame.cpp
    llvm/test/Transforms/Coroutines/coro-async.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index aa0a64d0c5d15..73e02b207d547 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -429,12 +429,15 @@ class FrameTypeBuilder {
   Align StructAlign;
   bool IsFinished = false;
 
+  Optional<Align> MaxFrameAlignment;
+
   SmallVector<Field, 8> Fields;
   DenseMap<Value*, unsigned> FieldIndexByKey;
 
 public:
-  FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
-      : DL(DL), Context(Context) {}
+  FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
+                   Optional<Align> MaxFrameAlignment)
+      : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
 
   /// Add a field to this structure for the storage of an `alloca`
   /// instruction.
@@ -485,7 +488,8 @@ class FrameTypeBuilder {
 
   /// Add a field to this structure.
   LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
-                                      bool IsHeader = false) {
+                                      bool IsHeader = false,
+                                      bool IsSpillOfValue = false) {
     assert(!IsFinished && "adding fields to a finished builder");
     assert(Ty && "must provide a type for a field");
 
@@ -500,8 +504,16 @@ class FrameTypeBuilder {
 
     // The field alignment might not be the type alignment, but we need
     // to remember the type alignment anyway to build the type.
-    Align TyAlignment = DL.getABITypeAlign(Ty);
-    if (!FieldAlignment) FieldAlignment = TyAlignment;
+    // If we are spilling values we don't need to worry about ABI alignment
+    // concerns.
+    auto ABIAlign = DL.getABITypeAlign(Ty);
+    Align TyAlignment =
+        (IsSpillOfValue && MaxFrameAlignment)
+            ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
+            : ABIAlign;
+    if (!FieldAlignment) {
+      FieldAlignment = TyAlignment;
+    }
 
     // Lay out header fields immediately.
     uint64_t Offset;
@@ -1089,7 +1101,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
     return StructType::create(C, Name);
   }();
 
-  FrameTypeBuilder B(C, DL);
+  // We will use this value to cap the alignment of spilled values.
+  Optional<Align> MaxFrameAlignment;
+  if (Shape.ABI == coro::ABI::Async)
+    MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
+  FrameTypeBuilder B(C, DL, MaxFrameAlignment);
 
   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
   Optional<FieldIDType> SwitchIndexFieldId;
@@ -1142,7 +1158,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
     if (const Argument *A = dyn_cast<Argument>(S.first))
       if (A->hasByValAttr())
         FieldType = FieldType->getPointerElementType();
-    FieldIDType Id = B.addField(FieldType, None);
+    FieldIDType Id =
+        B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);
     FrameData.setFieldIndex(S.first, Id);
   }
 
@@ -1545,6 +1562,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
 
   for (auto const &E : FrameData.Spills) {
     Value *Def = E.first;
+    auto SpillAlignment = Align(FrameData.getAlign(Def));
     // Create a store instruction storing the value into the
     // coroutine frame.
     Instruction *InsertPt = nullptr;
@@ -1601,9 +1619,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
       // instead of the pointer itself.
       auto *Value =
           Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);
-      Builder.CreateStore(Value, G);
+      Builder.CreateAlignedStore(Value, G, SpillAlignment);
     } else {
-      Builder.CreateStore(Def, G);
+      Builder.CreateAlignedStore(Def, G, SpillAlignment);
     }
 
     BasicBlock *CurrentBlock = nullptr;
@@ -1621,9 +1639,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
         if (NeedToCopyArgPtrValue)
           CurrentReload = GEP;
         else
-          CurrentReload = Builder.CreateLoad(
+          CurrentReload = Builder.CreateAlignedLoad(
               FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
-              E.first->getName() + Twine(".reload"));
+              SpillAlignment, E.first->getName() + Twine(".reload"));
 
         TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
         for (DbgDeclareInst *DDI : DIs) {

diff  --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll
index 3a8be47223c23..58aaa0efb12e8 100644
--- a/llvm/test/Transforms/Coroutines/coro-async.ll
+++ b/llvm/test/Transforms/Coroutines/coro-async.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -enable-coroutines -passes='default<O2>' -S | FileCheck --check-prefixes=CHECK %s
-; RUN: opt < %s -enable-coroutines -O0 -S
+; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck --check-prefixes=CHECK-O0 %s
 target datalayout = "p:64:64:64"
 
 %async.task = type { i64 }
@@ -64,6 +64,7 @@ entry:
 define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) !dbg !1 {
 entry:
   %tmp = alloca { i64, i64 }, align 8
+  %vector = alloca <4 x double>, align 16
   %proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0
   %proj.2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 1
 
@@ -95,6 +96,7 @@ entry:
   store i8* %async.ctxt, i8** %callee_context.caller_context.addr
   %resume_proj_fun = bitcast i8*(i8*)* @__swift_async_resume_project_context to i8*
   %callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8*
+  %vector_spill = load <4 x double>, <4 x double>* %vector, align 16
   %res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
                                                   i8* %resume.func_ptr,
                                                   i8* %resume_proj_fun,
@@ -108,7 +110,7 @@ entry:
   call void @some_user(i64 %val)
   %val.2 = load i64, i64* %proj.2
   call void @some_user(i64 %val.2)
-
+  store <4 x double> %vector_spill, <4 x double>* %vector, align 16
   tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor)
   call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
   unreachable
@@ -126,6 +128,7 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
 ; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 }
 
 ; CHECK-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
+; CHECK-O0-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
 ; CHECK-SAME: !dbg ![[SP1:[0-9]+]] {
 ; CHECK: coro.return:
 ; CHECK:   [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128
@@ -151,16 +154,23 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
 ; CHECK:   store i8* bitcast (void (i8*, i8*, i8*)* @my_async_functionTQ0_ to i8*), i8** [[RETURN_TO_CALLER_ADDR]]
 ; CHECK:   [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* [[CALLEE_CTXT]] to i8**
 ; CHECK:   store i8* %async.ctxt, i8** [[CALLER_CONTEXT_ADDR]]
+; Make sure the spill is underaligned to the max context alignment (16).
+; CHECK-O0:   [[VECTOR_SPILL:%.*]] = load <4 x double>, <4 x double>* {{.*}}
+; CHECK-O0:   [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
+; CHECK-O0:   store <4 x double> [[VECTOR_SPILL]], <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
 ; CHECK:   tail call swiftcc void @asyncSuspend(i8* [[CALLEE_CTXT]], %async.task* %task, %async.actor* %actor)
 ; CHECK:   ret void
 ; CHECK: }
 
 ; CHECK-LABEL: define internal swiftcc void @my_async_functionTQ0_(i8* nocapture readonly swiftasync %0, i8* %1, i8* nocapture readnone %2)
+; CHECK-O0-LABEL: define internal swiftcc void @my_async_functionTQ0_(i8* swiftasync %0, i8* %1, i8* %2)
 ; CHECK-SAME: !dbg ![[SP2:[0-9]+]] {
 ; CHECK: entryresume.0:
 ; CHECK:   [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* %0 to i8**
 ; CHECK:   [[CALLER_CONTEXT:%.*]] = load i8*, i8** [[CALLER_CONTEXT_ADDR]]
 ; CHECK:   [[FRAME_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 128
+; CHECK-O0:   [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
+; CHECK-O0:   load <4 x double>, <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
 ; CHECK:   [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 160
 ; CHECK:   [[CAST1:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL_ADDR]] to i8**
 ; CHECK:   [[CALLEE_CTXT_RELOAD:%.*]] = load i8*, i8** [[CAST1]]


        


More information about the llvm-commits mailing list