[llvm] 0b5ead6 - [WebAssembly] Don't set musttail for coroutines when tail-call is not

Wed Jun 29 20:19:21 PDT 2022

Author: Chuanqi Xu
Date: 2022-06-30T11:15:40+08:00
New Revision: 0b5ead659023dedd6c450657df9f242a2819871a

URL: https://github.com/llvm/llvm-project/commit/0b5ead659023dedd6c450657df9f242a2819871a
DIFF: https://github.com/llvm/llvm-project/commit/0b5ead659023dedd6c450657df9f242a2819871a.diff

LOG: [WebAssembly] Don't set musttail for coroutines when tail-call is not
enabled

The C++20 Coroutines couldn't be compiled to WebAssembly due to an
optimization named symmetric transfer requires the support for musttail
calls but WebAssembly doesn't support it yet.

This patch tries to fix the problem by adding a supportsTailCalls
method to TargetTransformImpl to skip the symmetric transfer when
tail-call feature is not supported.

Reviewed By: tlively

Differential Revision: https://reviews.llvm.org/D128794

Added: 
    llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail8.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail9.ll

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
    llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 9e30cb3c559f3..372f17cfc7ff8 100644

--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -776,6 +776,9 @@ class TargetTransformInfo {
   /// the scalarization cost of a load/store.
   bool supportsEfficientVectorElementLoadStore() const;
 
+  /// If the target supports tail calls.
+  bool supportsTailCalls() const;
+
   /// Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
@@ -1621,6 +1624,7 @@ class TargetTransformInfo::Concept {
   getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                    ArrayRef<Type *> Tys) = 0;
   virtual bool supportsEfficientVectorElementLoadStore() = 0;
+  virtual bool supportsTailCalls() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual MemCmpExpansionOptions
   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
@@ -2083,6 +2087,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
     return Impl.supportsEfficientVectorElementLoadStore();
   }
 
+  bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
+
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);
   }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 425cad8c997ef..a70c418974f54 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -340,6 +340,8 @@ class TargetTransformInfoImplBase {
 
   bool supportsEfficientVectorElementLoadStore() const { return false; }
 
+  bool supportsTailCalls() const { return true; }
+
   bool enableAggressiveInterleaving(bool LoopHasReductions) const {
     return false;
   }

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 9c59303bde988..66f61961d01b3 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -522,6 +522,10 @@ bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
   return TTIImpl->supportsEfficientVectorElementLoadStore();
 }
 
+bool TargetTransformInfo::supportsTailCalls() const {
+  return TTIImpl->supportsTailCalls();
+}
+
 bool TargetTransformInfo::enableAggressiveInterleaving(
     bool LoopHasReductions) const {
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index f1ebcbc6fc513..62f7155e794a2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -139,3 +139,7 @@ void WebAssemblyTTIImpl::getUnrollingPreferences(
   // becomes "fall through" to default value of 2.
   UP.BEInsns = 2;
 }
+
+bool WebAssemblyTTIImpl::supportsTailCalls() const {
+  return getST()->hasTailCall();
+}

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 50036f7f7e987..fde58a9587b69 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -74,6 +74,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
 
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
+
+  bool supportsTailCalls() const;
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index ba85691c6214d..ead552d9be4e2 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
@@ -1571,7 +1572,8 @@ static void simplifySuspendPoints(coro::Shape &Shape) {
 }
 
 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
-                                 SmallVectorImpl<Function *> &Clones) {
+                                 SmallVectorImpl<Function *> &Clones,
+                                 TargetTransformInfo &TTI) {
   assert(Shape.ABI == coro::ABI::Switch);
 
   createResumeEntryBlock(F, Shape);
@@ -1586,7 +1588,13 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
   postSplitCleanup(*DestroyClone);
   postSplitCleanup(*CleanupClone);
 
-  addMustTailToCoroResumes(*ResumeClone);
+  // Adding musttail call to support symmetric transfer.
+  // Skip targets which don't support tail call.
+  //
+  // FIXME: Could we support symmetric transfer effectively without musttail
+  // call?
+  if (TTI.supportsTailCalls())
+    addMustTailToCoroResumes(*ResumeClone);
 
   // Store addresses resume/destroy/cleanup functions in the coroutine frame.
   updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
@@ -1891,6 +1899,7 @@ namespace {
 
 static coro::Shape splitCoroutine(Function &F,
                                   SmallVectorImpl<Function *> &Clones,
+                                  TargetTransformInfo &TTI,
                                   bool OptimizeFrame) {
   PrettyStackTraceFunction prettyStackTrace(F);
 
@@ -1913,7 +1922,7 @@ static coro::Shape splitCoroutine(Function &F,
   } else {
     switch (Shape.ABI) {
     case coro::ABI::Switch:
-      splitSwitchCoroutine(F, Shape, Clones);
+      splitSwitchCoroutine(F, Shape, Clones, TTI);
       break;
     case coro::ABI::Async:
       splitAsyncCoroutine(F, Shape, Clones);
@@ -2090,7 +2099,8 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
     F.setSplittedCoroutine();
 
     SmallVector<Function *, 4> Clones;
-    const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame);
+    const coro::Shape Shape = splitCoroutine(
+        F, Clones, FAM.getResult<TargetIRAnalysis>(F), OptimizeFrame);
     updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
 
     if (!Shape.CoroSuspends.empty()) {

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
new file mode 100644
index 0000000000000..42d6417808e4b
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -0,0 +1,56 @@
+; Tests that we would convert coro.resume to a musttail call if the target is
+; Wasm64 with tail-call support.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+target triple = "wasm64-unknown-unknown"
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call i8* @malloc(i64 16) #3
+  %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %save = call token @llvm.coro.save(i8* null)
+  %addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv1 = bitcast i8* %addr1 to void (i8*)*
+  call fastcc void %pv1(i8* null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(i8* null)
+  %addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv2 = bitcast i8* %addr2 to void (i8*)*
+  call fastcc void %pv2(i8* null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; CHECK: musttail call
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*) #2
+declare i8* @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
+declare i1 @llvm.coro.end(i8*, i1) #2
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1
+declare i8* @malloc(i64)
+
+attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
new file mode 100644
index 0000000000000..3f07448c3225b
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
@@ -0,0 +1,56 @@
+; Tests that we would convert coro.resume to a musttail call if the target is
+; Wasm32 with tail-call support.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call i8* @malloc(i64 16) #3
+  %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %save = call token @llvm.coro.save(i8* null)
+  %addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv1 = bitcast i8* %addr1 to void (i8*)*
+  call fastcc void %pv1(i8* null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(i8* null)
+  %addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv2 = bitcast i8* %addr2 to void (i8*)*
+  call fastcc void %pv2(i8* null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; CHECK: musttail call
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*) #2
+declare i8* @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
+declare i1 @llvm.coro.end(i8*, i1) #2
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1
+declare i8* @malloc(i64)
+
+attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail8.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail8.ll
new file mode 100644
index 0000000000000..1265adbec7bfe
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail8.ll
@@ -0,0 +1,57 @@
+; Tests that we wouldn't convert coro.resume to a musttail call if the target is
+; Wasm32.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call i8* @malloc(i64 16) #3
+  %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %save = call token @llvm.coro.save(i8* null)
+  %addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv1 = bitcast i8* %addr1 to void (i8*)*
+  call fastcc void %pv1(i8* null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(i8* null)
+  %addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv2 = bitcast i8* %addr2 to void (i8*)*
+  call fastcc void %pv2(i8* null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; CHECK-NOT: musttail
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*) #2
+declare i8* @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
+declare i1 @llvm.coro.end(i8*, i1) #2
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1
+declare i8* @malloc(i64)
+declare void @print()
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail9.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail9.ll
new file mode 100644
index 0000000000000..6466d13c54503
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail9.ll
@@ -0,0 +1,57 @@
+; Tests that we wouldn't convert coro.resume to a musttail call if the target is
+; Wasm64.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+target triple = "wasm64-unknown-unknown"
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call i8* @malloc(i64 16) #3
+  %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %save = call token @llvm.coro.save(i8* null)
+  %addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv1 = bitcast i8* %addr1 to void (i8*)*
+  call fastcc void %pv1(i8* null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(i8* null)
+  %addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv2 = bitcast i8* %addr2 to void (i8*)*
+  call fastcc void %pv2(i8* null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; CHECK-NOT: musttail
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*) #2
+declare i8* @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
+declare i1 @llvm.coro.end(i8*, i1) #2
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1
+declare i8* @malloc(i64)
+declare void @print()
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }