[llvm] 58e6ee0 - llvm.swift.async.context.addr cannot be modeled as NoMem because we don't want it to be cse'd accross async suspends

Arnold Schwaighofer via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 22 11:51:44 PDT 2022


Author: Arnold Schwaighofer
Date: 2022-07-22T11:50:58-07:00
New Revision: 58e6ee0e1f1668ff9dd93e4a4084e36bc79fdf88

URL: https://github.com/llvm/llvm-project/commit/58e6ee0e1f1668ff9dd93e4a4084e36bc79fdf88
DIFF: https://github.com/llvm/llvm-project/commit/58e6ee0e1f1668ff9dd93e4a4084e36bc79fdf88.diff

LOG: llvm.swift.async.context.addr cannot be modeled as NoMem because we don't want it to be cse'd accross async suspends

An async suspend models the split between two partial async functions.
`llvm.swift.async.context.addr ` will have a different value in the two
partial functions so it is not correct to generally CSE the instruction.

rdar://97336162

Differential Revision: https://reviews.llvm.org/D130201

Added: 
    llvm/test/Transforms/Coroutines/coro-async-no-cse-swift-async-context-addr.ll

Modified: 
    llvm/include/llvm/IR/Intrinsics.td
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index cb14c08a5b568..c523e3773de42 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -495,7 +495,7 @@ def int_objc_arc_annotation_bottomup_bbend  : Intrinsic<[],
 // Returns the location of the Swift asynchronous context (usually stored just
 // before the frame pointer), and triggers the creation of a null context if it
 // would otherwise be unneeded.
-def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], [IntrNoMem]>;
+def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], []>;
 
 //===--------------------- Code Generator Intrinsics ----------------------===//
 //

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 5fbb8e5f81dfe..00621b84d2f23 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4074,6 +4074,24 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
       }
       break;
     }
+    case Intrinsic::swift_async_context_addr: {
+      SDLoc DL(Node);
+      SDValue Chain = Node->getOperand(0);
+      SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
+      SDValue Res = SDValue(
+          CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
+                                 CurDAG->getTargetConstant(8, DL, MVT::i32),
+                                 CurDAG->getTargetConstant(0, DL, MVT::i32)),
+          0);
+      ReplaceUses(SDValue(Node, 0), Res);
+      ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
+      CurDAG->RemoveDeadNode(Node);
+
+      auto &MF = CurDAG->getMachineFunction();
+      MF.getFrameInfo().setFrameAddressIsTaken(true);
+      MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
+      return;
+    }
     }
   } break;
   case ISD::INTRINSIC_WO_CHAIN: {
@@ -4119,18 +4137,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
       if (tryMULLV64LaneV128(IntNo, Node))
         return;
       break;
-    case Intrinsic::swift_async_context_addr: {
-      SDLoc DL(Node);
-      CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64,
-                           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
-                                                  AArch64::FP, MVT::i64),
-                           CurDAG->getTargetConstant(8, DL, MVT::i32),
-                           CurDAG->getTargetConstant(0, DL, MVT::i32));
-      auto &MF = CurDAG->getMachineFunction();
-      MF.getFrameInfo().setFrameAddressIsTaken(true);
-      MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
-      return;
-    }
     }
     break;
   }

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a54e899a1cc2c..7805493c86fc7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27318,27 +27318,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     }
     return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   }
-  case Intrinsic::swift_async_context_addr: {
-    auto &MF = DAG.getMachineFunction();
-    auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
-    if (Subtarget.is64Bit()) {
-      MF.getFrameInfo().setFrameAddressIsTaken(true);
-      X86FI->setHasSwiftAsyncContext(true);
-      return SDValue(
-          DAG.getMachineNode(
-              X86::SUB64ri8, dl, MVT::i64,
-              DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64),
-              DAG.getTargetConstant(8, dl, MVT::i32)),
-          0);
-    } else {
-      // 32-bit so no special extended frame, create or reuse an existing stack
-      // slot.
-      if (!X86FI->getSwiftAsyncContextFrameIdx())
-        X86FI->setSwiftAsyncContextFrameIdx(
-            MF.getFrameInfo().CreateStackObject(4, Align(4), false));
-      return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
-    }
-  }
   case Intrinsic::x86_avx512_vp2intersect_q_512:
   case Intrinsic::x86_avx512_vp2intersect_q_256:
   case Intrinsic::x86_avx512_vp2intersect_q_128:
@@ -27718,6 +27697,37 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
   const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo);
   if (!IntrData) {
     switch (IntNo) {
+
+    case Intrinsic::swift_async_context_addr: {
+      SDLoc dl(Op);
+      auto &MF = DAG.getMachineFunction();
+      auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
+      if (Subtarget.is64Bit()) {
+        MF.getFrameInfo().setFrameAddressIsTaken(true);
+        X86FI->setHasSwiftAsyncContext(true);
+        SDValue Chain = Op->getOperand(0);
+        SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64);
+        SDValue Result =
+            SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP,
+                                       DAG.getTargetConstant(8, dl, MVT::i32)),
+                    0);
+        // Return { result, chain }.
+        return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
+                           CopyRBP.getValue(1));
+      } else {
+        // 32-bit so no special extended frame, create or reuse an existing
+        // stack slot.
+        if (!X86FI->getSwiftAsyncContextFrameIdx())
+          X86FI->setSwiftAsyncContextFrameIdx(
+              MF.getFrameInfo().CreateStackObject(4, Align(4), false));
+        SDValue Result =
+            DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
+        // Return { result, chain }.
+        return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
+                           Op->getOperand(0));
+      }
+    }
+
     case llvm::Intrinsic::x86_seh_ehregnode:
       return MarkEHRegistrationNode(Op, DAG);
     case llvm::Intrinsic::x86_seh_ehguard:

diff  --git a/llvm/test/Transforms/Coroutines/coro-async-no-cse-swift-async-context-addr.ll b/llvm/test/Transforms/Coroutines/coro-async-no-cse-swift-async-context-addr.ll
new file mode 100644
index 0000000000000..c5e9ac2200c6c
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-async-no-cse-swift-async-context-addr.ll
@@ -0,0 +1,77 @@
+; RUN: opt < %s -passes='default<O2>' -S | FileCheck --check-prefixes=CHECK %s
+target datalayout = "p:64:64:64"
+
+%swift.async_func_pointer = type <{ i32, i32 }>
+%swift.context = type { %swift.context*, void (%swift.context*)* }
+
+ at repoTU = global %swift.async_func_pointer <{ i32 trunc (i64 sub (i64 ptrtoint (void (%swift.context*)* @repo to i64), i64 ptrtoint (%swift.async_func_pointer* @repoTU to i64)) to i32), i32 16 }>, align 8
+
+declare swifttailcc void @callee.0(%swift.context* swiftasync, i8*, i64, i64)
+
+define internal swifttailcc void @callee(i8* %0, i64 %1, i64 %2, %swift.context* %3) {
+entry:
+  musttail call swifttailcc void @callee.0(%swift.context* swiftasync %3, i8* %0, i64 %1, i64 %2)
+  ret void
+}
+
+define swifttailcc void @repo(%swift.context* swiftasync %0) {
+entry:
+  %1 = alloca %swift.context*, align 8
+  %2 = bitcast %swift.context* %0 to <{ %swift.context*, void (%swift.context*)* }>*
+  %3 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, i8* bitcast (%swift.async_func_pointer* @repoTU to i8*))
+  %4 = call i8* @llvm.coro.begin(token %3, i8* null)
+  store %swift.context* %0, %swift.context** %1, align 8
+
+  ; This context.addr is the address in the frame of the first partial function after splitting.
+  %5 = call i8** @llvm.swift.async.context.addr()
+	store i8* null, i8** %5, align 8
+
+  %6 = call i8* @llvm.coro.async.resume()
+  %7 = call { i8* } (i32, i8*, i8*, ...) @llvm.coro.suspend.async.sl_p0i8s(i32 0,
+                                                                           i8* %6,
+                                                                           i8* bitcast (i8* (i8*)* @__swift_async_resume_get_context to i8*),
+                                                                           i8* bitcast (void (i8*, i64, i64, %swift.context*)* @callee to i8*),
+                                                                           i8* %6, i64 0, i64 0, %swift.context* %0)
+  %8 = load %swift.context*, %swift.context** %1, align 8
+  %9 = bitcast %swift.context* %8 to <{ %swift.context*, void (%swift.context*)* }>*
+  %10 = getelementptr inbounds <{ %swift.context*, void (%swift.context*)* }>, <{ %swift.context*, void (%swift.context*)* }>* %9, i32 0, i32 1
+  %11 = load void (%swift.context*)*, void (%swift.context*)** %10, align 8
+  %12 = load %swift.context*, %swift.context** %1, align 8
+  %13 = bitcast void (%swift.context*)* %11 to i8*
+
+  ; This context.addr is the address in the frame of the second partial function after splitting.
+  ; It is not valid to CSE it with the previous call.
+  %14 = call i8** @llvm.swift.async.context.addr()
+	store i8* %13, i8** %14, align 8
+
+  %15 = call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %4, i1 false, void (i8*, %swift.context*)* @repo.0, i8* %13, %swift.context* %12)
+  unreachable
+}
+
+; Make sure we don't CSE the llvm.swift.async.context.addr calls
+; CHECK: define swifttailcc void @repo
+; CHECK: call i8** @llvm.swift.async.context.addr()
+
+; CHECK: define {{.*}}swifttailcc void @repoTY0_
+; CHECK: call i8** @llvm.swift.async.context.addr()
+
+define internal swifttailcc void @repo.0(i8* %0, %swift.context* %1) #1 {
+entry:
+  %2 = bitcast i8* %0 to void (%swift.context*)*
+  musttail call swifttailcc void %2(%swift.context* swiftasync %1)
+  ret void
+}
+
+define linkonce_odr hidden i8* @__swift_async_resume_get_context(i8* %0) #1 {
+entry:
+  ret i8* %0
+}
+
+declare { i8* } @llvm.coro.suspend.async.sl_p0i8s(i32, i8*, i8*, ...) #1
+declare token @llvm.coro.id.async(i32, i32, i32, i8*) #1
+declare i8* @llvm.coro.begin(token, i8* writeonly) #1
+declare i1 @llvm.coro.end.async(i8*, i1, ...) #1
+declare i8* @llvm.coro.async.resume() #1
+declare i8** @llvm.swift.async.context.addr() #1
+
+attributes #1 = { nounwind }


        


More information about the llvm-commits mailing list