[llvm] r348897 - [coroutines] Improve suspend point simplification

Gor Nishanov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 11 13:23:09 PST 2018


Author: gornishanov
Date: Tue Dec 11 13:23:09 2018
New Revision: 348897

URL: http://llvm.org/viewvc/llvm-project?rev=348897&view=rev
Log:
[coroutines] Improve suspend point simplification

Summary:
Enable suspend point simplification for cases where:
* coro.save and coro.suspend are in different basic blocks
* where there are intervening intrinsics

Reviewers: modocache, tks2103, lewissbaker

Reviewed By: modocache

Subscribers: EricWF, llvm-commits

Differential Revision: https://reviews.llvm.org/D55160

Modified:
    llvm/trunk/lib/Transforms/Coroutines/CoroSplit.cpp
    llvm/trunk/test/Transforms/Coroutines/no-suspend.ll

Modified: llvm/trunk/lib/Transforms/Coroutines/CoroSplit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Coroutines/CoroSplit.cpp?rev=348897&r1=348896&r2=348897&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Coroutines/CoroSplit.cpp (original)
+++ llvm/trunk/lib/Transforms/Coroutines/CoroSplit.cpp Tue Dec 11 13:23:09 2018
@@ -538,43 +538,92 @@ static void handleNoSuspendCoroutine(Cor
   CoroBegin->eraseFromParent();
 }
 
-// look for a very simple pattern
-//    coro.save
-//    no other calls
-//    resume or destroy call
-//    coro.suspend
-//
-// If there are other calls between coro.save and coro.suspend, they can
-// potentially resume or destroy the coroutine, so it is unsafe to eliminate a
-// suspend point.
-static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
-                                 CoroBeginInst *CoroBegin) {
-  auto *Save = Suspend->getCoroSave();
-  auto *BB = Suspend->getParent();
-  if (BB != Save->getParent())
-    return false;
+// SimplifySuspendPoint needs to check that there is no calls between
+// coro_save and coro_suspend, since any of the calls may potentially resume
+// the coroutine and if that is the case we cannot eliminate the suspend point.
+static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
+  for (Instruction *I = From; I != To; I = I->getNextNode()) {
+    // Assume that no intrinsic can resume the coroutine.
+    if (isa<IntrinsicInst>(I))
+      continue;
 
-  CallSite SingleCallSite;
+    if (CallSite CS = CallSite(I))
+      return true;
+  }
+  return false;
+}
 
-  // Check that we have only one CallSite.
-  for (Instruction *I = Save->getNextNode(); I != Suspend;
-       I = I->getNextNode()) {
-    if (isa<CoroFrameInst>(I))
-      continue;
-    if (isa<CoroSubFnInst>(I))
-      continue;
-    if (CallSite CS = CallSite(I)) {
-      if (SingleCallSite)
-        return false;
-      else
-        SingleCallSite = CS;
-    }
+static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
+  SmallPtrSet<BasicBlock *, 8> Set;
+  SmallVector<BasicBlock *, 8> Worklist;
+
+  Set.insert(SaveBB);
+  Worklist.push_back(ResDesBB);
+
+  // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
+  // returns a token consumed by suspend instruction, all blocks in between
+  // will have to eventually hit SaveBB when going backwards from ResDesBB.
+  while (!Worklist.empty()) {
+    auto *BB = Worklist.pop_back_val();
+    Set.insert(BB);
+    for (auto *Pred : predecessors(BB))
+      if (Set.count(Pred) == 0)
+        Worklist.push_back(Pred);
   }
-  auto *CallInstr = SingleCallSite.getInstruction();
-  if (!CallInstr)
+
+  // SaveBB and ResDesBB are checked separately in hasCallsBetween.
+  Set.erase(SaveBB);
+  Set.erase(ResDesBB);
+
+  for (auto *BB : Set)
+    if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
+      return true;
+
+  return false;
+}
+
+static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
+  auto *SaveBB = Save->getParent();
+  auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
+
+  if (SaveBB == ResumeOrDestroyBB)
+    return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
+
+  // Any calls from Save to the end of the block?
+  if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
+    return true;
+
+  // Any calls from begging of the block up to ResumeOrDestroy?
+  if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
+                             ResumeOrDestroy))
+    return true;
+
+  // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
+  if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
+    return true;
+
+  return false;
+}
+
+// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
+// suspend point and replace it with nornal control flow.
+static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
+                                 CoroBeginInst *CoroBegin) {
+  Instruction *Prev = Suspend->getPrevNode();
+  if (!Prev) {
+    auto *Pred = Suspend->getParent()->getSinglePredecessor();
+    if (!Pred)
+      return false;
+    Prev = Pred->getTerminator();
+  }
+
+  CallSite CS{Prev};
+  if (!CS)
     return false;
 
-  auto *Callee = SingleCallSite.getCalledValue()->stripPointerCasts();
+  auto *CallInstr = CS.getInstruction();
+
+  auto *Callee = CS.getCalledValue()->stripPointerCasts();
 
   // See if the callsite is for resumption or destruction of the coroutine.
   auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
@@ -585,6 +634,13 @@ static bool simplifySuspendPoint(CoroSus
   if (SubFn->getFrame() != CoroBegin)
     return false;
 
+  // See if the transformation is safe. Specifically, see if there are any
+  // calls in between Save and CallInstr. They can potenitally resume the
+  // coroutine rendering this optimization unsafe.
+  auto *Save = Suspend->getCoroSave();
+  if (hasCallsBetween(Save, CallInstr))
+    return false;
+
   // Replace llvm.coro.suspend with the value that results in resumption over
   // the resume or cleanup path.
   Suspend->replaceAllUsesWith(SubFn->getRawIndex());
@@ -592,8 +648,20 @@ static bool simplifySuspendPoint(CoroSus
   Save->eraseFromParent();
 
   // No longer need a call to coro.resume or coro.destroy.
+  if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
+    BranchInst::Create(Invoke->getNormalDest(), Invoke);
+  }
+
+  // Grab the CalledValue from CS before erasing the CallInstr.
+  auto *CalledValue = CS.getCalledValue();
   CallInstr->eraseFromParent();
 
+  // If no more users remove it. Usually it is a bitcast of SubFn.
+  if (CalledValue != SubFn && CalledValue->user_empty())
+    if (auto *I = dyn_cast<Instruction>(CalledValue))
+      I->eraseFromParent();
+
+  // Now we are good to remove SubFn.
   if (SubFn->user_empty())
     SubFn->eraseFromParent();
 

Modified: llvm/trunk/test/Transforms/Coroutines/no-suspend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/no-suspend.ll?rev=348897&r1=348896&r2=348897&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/no-suspend.ll (original)
+++ llvm/trunk/test/Transforms/Coroutines/no-suspend.ll Tue Dec 11 13:23:09 2018
@@ -1,14 +1,16 @@
 ; Test no suspend coroutines
-; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+; RUN: opt < %s -coro-split -S | FileCheck %s
 
 ; Coroutine with no-suspends will turn into:
 ;
 ; CHECK-LABEL: define void @no_suspends(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
 ; CHECK-NEXT:    call void @print(i32 %n)
 ; CHECK-NEXT:    ret void
 ;
-define void @no_suspends(i32 %n) {
+define void @no_suspends(i32 %n) "coroutine.presplit"="1" {
 entry:
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
   %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
@@ -37,15 +39,18 @@ suspend:
 }
 
 ; SimplifySuspendPoint will detect that coro.resume resumes itself and will
-; replace suspend with a jump to %resume label turning it into no-suspend 
+; replace suspend with a jump to %resume label turning it into no-suspend
 ; coroutine.
 ;
 ; CHECK-LABEL: define void @simplify_resume(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
+; CHECK-NEXT:    call void @llvm.memcpy
 ; CHECK-NEXT:    call void @print(i32 0)
 ; CHECK-NEXT:    ret void
 ;
-define void @simplify_resume() {
+define void @simplify_resume(i8* %src, i8* %dst) "coroutine.presplit"="1" {
 entry:
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
   %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
@@ -60,7 +65,11 @@ coro.begin:
   br label %body
 body:
   %save = call token @llvm.coro.save(i8* %hdl)
-  call void @llvm.coro.resume(i8* %hdl)
+  ; memcpy intrinsics should not prevent simplification.
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %bres = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bres(i8* %hdl)
   %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
   switch i8 %0, label %suspend [i8 0, label %resume
                                 i8 1, label %pre.cleanup]
@@ -82,15 +91,17 @@ suspend:
 }
 
 ; SimplifySuspendPoint will detect that coroutine destroys itself and will
-; replace suspend with a jump to %cleanup label turning it into no-suspend 
+; replace suspend with a jump to %cleanup label turning it into no-suspend
 ; coroutine.
 ;
 ; CHECK-LABEL: define void @simplify_destroy(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
 ; CHECK-NEXT:    call void @print(i32 1)
 ; CHECK-NEXT:    ret void
 ;
-define void @simplify_destroy() {
+define void @simplify_destroy() "coroutine.presplit"="1" personality i32 0 {
 entry:
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
   %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
@@ -105,7 +116,11 @@ coro.begin:
   br label %body
 body:
   %save = call token @llvm.coro.save(i8* %hdl)
-  call void @llvm.coro.destroy(i8* %hdl)
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  invoke fastcc void %bcast(i8* %hdl) to label %real_susp unwind label %lpad
+
+real_susp:
   %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
   switch i8 %0, label %suspend [i8 0, label %resume
                                 i8 1, label %pre.cleanup]
@@ -124,17 +139,83 @@ cleanup:
 suspend:
   call i1 @llvm.coro.end(i8* %hdl, i1 false)
   ret void
+lpad:
+  %lpval = landingpad { i8*, i32 }
+     cleanup
+
+  call void @print(i32 2)
+  resume { i8*, i32 } %lpval
 }
 
+; SimplifySuspendPoint will detect that coro.resume resumes itself and will
+; replace suspend with a jump to %resume label turning it into no-suspend
+; coroutine.
+;
+; CHECK-LABEL: define void @simplify_resume_with_inlined_if(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
+; CHECK-NEXT:    br i1
+; CHECK:         call void @print(i32 0)
+; CHECK-NEXT:    ret void
+;
+define void @simplify_resume_with_inlined_if(i8* %src, i8* %dst, i1 %cond) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  br i1 %cond, label %if.then, label %if.else
+if.then:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
+  br label %if.end
+if.else:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %dst, i64 1, i1 false)
+  br label %if.end
+if.end:
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %bres = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bres(i8* %hdl)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+}
+
+
+
 ; SimplifySuspendPoint won't be able to simplify if it detects that there are
 ; other calls between coro.save and coro.suspend. They potentially can call
 ; resume or destroy, so we should not simplify this suspend point.
 ;
-; CHECK-LABEL: define void @cannot_simplify(
+; CHECK-LABEL: define void @cannot_simplify_other_calls(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call i8* @malloc
+; CHECK-NEXT:     llvm.coro.id
 
-define void @cannot_simplify() {
+define void @cannot_simplify_other_calls() "coroutine.presplit"="1" {
 entry:
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
   %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
@@ -149,8 +230,117 @@ coro.begin:
   br label %body
 body:
   %save = call token @llvm.coro.save(i8* %hdl)
+  br label %body1
+
+body1:
   call void @foo()
-  call void @llvm.coro.destroy(i8* %hdl)
+  br label %body2
+
+body2:
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bcast(i8* %hdl)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+}
+
+; SimplifySuspendPoint won't be able to simplify if it detects that there are
+; other calls between coro.save and coro.suspend. They potentially can call
+; resume or destroy, so we should not simplify this suspend point.
+;
+; CHECK-LABEL: define void @cannot_simplify_calls_in_terminator(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:     llvm.coro.id
+
+define void @cannot_simplify_calls_in_terminator() "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  invoke void @foo() to label %resume_cont unwind label %lpad
+resume_cont:
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bcast(i8* %hdl)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+lpad:
+  %lpval = landingpad { i8*, i32 }
+     cleanup
+
+  call void @print(i32 2)
+  resume { i8*, i32 } %lpval
+}
+
+; SimplifySuspendPoint won't be able to simplify if it detects that resume or
+; destroy does not immediately preceed coro.suspend.
+;
+; CHECK-LABEL: define void @cannot_simplify_not_last_instr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:     llvm.coro.id
+
+define void @cannot_simplify_not_last_instr(i8* %dst, i8* %src) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bcast(i8* %hdl)
+  ; memcpy separates destory from suspend, therefore cannot simplify.
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
   %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
   switch i8 %0, label %suspend [i8 0, label %resume
                                 i8 1, label %pre.cleanup]
@@ -185,5 +375,6 @@ declare i8 @llvm.coro.suspend(token, i1)
 declare i8* @llvm.coro.free(token, i8*)
 declare i1 @llvm.coro.end(i8*, i1)
 
-declare void @llvm.coro.resume(i8*)
-declare void @llvm.coro.destroy(i8*)
+declare i8* @llvm.coro.subfn.addr(i8*, i8)
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)




More information about the llvm-commits mailing list