[clang] d2d77e0 - [PowerPC][Coroutines] Add tail-call check with call information for coroutines

Ting Wang via cfe-commits cfe-commits at lists.llvm.org
Sun Aug 21 19:25:47 PDT 2022


Author: Ting Wang
Date: 2022-08-21T22:20:40-04:00
New Revision: d2d77e050b32ce3f917688aeeb9e6f8f3c209560

URL: https://github.com/llvm/llvm-project/commit/d2d77e050b32ce3f917688aeeb9e6f8f3c209560
DIFF: https://github.com/llvm/llvm-project/commit/d2d77e050b32ce3f917688aeeb9e6f8f3c209560.diff

LOG: [PowerPC][Coroutines] Add tail-call check with call information for coroutines

Fixes #56679.

Reviewed By: ChuanqiXu, shchenz

Differential Revision: https://reviews.llvm.org/D131953

Added: 
    llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll

Modified: 
    clang/test/CodeGenCoroutines/pr56329.cpp
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
    llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGenCoroutines/pr56329.cpp b/clang/test/CodeGenCoroutines/pr56329.cpp
index 3918acae0f08f..2e9a1a244e218 100644
--- a/clang/test/CodeGenCoroutines/pr56329.cpp
+++ b/clang/test/CodeGenCoroutines/pr56329.cpp
@@ -1,6 +1,8 @@
 // Test for PR56919. Tests the we won't contain the resumption of final suspend point.
 //
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %s -O3 -S -emit-llvm -o - | FileCheck %s
+// This test is expected to fail on PowerPC.
+// XFAIL: powerpc
 
 #include "Inputs/coroutine.h"
 

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 04db5a9484a76..4dffc3a36a578 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -765,6 +765,9 @@ class TargetTransformInfo {
   /// If the target supports tail calls.
   bool supportsTailCalls() const;
 
+  /// If target supports tail call on \p CB
+  bool supportsTailCallFor(const CallBase *CB) const;
+
   /// Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
@@ -1635,6 +1638,7 @@ class TargetTransformInfo::Concept {
                                    ArrayRef<Type *> Tys) = 0;
   virtual bool supportsEfficientVectorElementLoadStore() = 0;
   virtual bool supportsTailCalls() = 0;
+  virtual bool supportsTailCallFor(const CallBase *CB) = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual MemCmpExpansionOptions
   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
@@ -2109,6 +2113,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   }
 
   bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
+  bool supportsTailCallFor(const CallBase *CB) override {
+    return Impl.supportsTailCallFor(CB);
+  }
 
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 116606973b398..38deff576092c 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -343,6 +343,10 @@ class TargetTransformInfoImplBase {
 
   bool supportsTailCalls() const { return true; }
 
+  bool supportsTailCallFor(const CallBase *CB) const {
+    return supportsTailCalls();
+  }
+
   bool enableAggressiveInterleaving(bool LoopHasReductions) const {
     return false;
   }

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index f9855ecf3d6e1..c81b8ba6e7857 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -528,6 +528,10 @@ bool TargetTransformInfo::supportsTailCalls() const {
   return TTIImpl->supportsTailCalls();
 }
 
+bool TargetTransformInfo::supportsTailCallFor(const CallBase *CB) const {
+  return TTIImpl->supportsTailCallFor(CB);
+}
+
 bool TargetTransformInfo::enableAggressiveInterleaving(
     bool LoopHasReductions) const {
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);

diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index d6a56628d47a4..88a43a582a1fd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1461,3 +1461,19 @@ InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
   // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
   return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
 }
+
+bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
+  // Subtargets using PC-Relative addressing supported.
+  if (ST->isUsingPCRelativeCalls())
+    return true;
+
+  const Function *Callee = CB->getCalledFunction();
+  // Indirect calls and variadic argument functions not supported.
+  if (!Callee || Callee->isVarArg())
+    return false;
+
+  const Function *Caller = CB->getCaller();
+  // Support if we can share TOC base.
+  return ST->getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(),
+                                                     Callee);
+}

diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 3caa66ddf2261..d4e465ca8567f 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -148,6 +148,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
                                     unsigned AddressSpace,
                                     TTI::TargetCostKind CostKind,
                                     const Instruction *I = nullptr);
+  bool supportsTailCallFor(const CallBase *CB) const;
 
 private:
   // The following constant is used for estimating costs on power9.

diff  --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 5efe377f1f938..0107d09059c17 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1362,7 +1362,7 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
 // This transformation is done only in the resume part of the coroutine that has
 // identical signature and calling convention as the coro.resume call.
-static void addMustTailToCoroResumes(Function &F) {
+static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
   bool changed = false;
 
   // Collect potential resume instructions.
@@ -1374,7 +1374,9 @@ static void addMustTailToCoroResumes(Function &F) {
 
   // Set musttail on those that are followed by a ret instruction.
   for (CallInst *Call : Resumes)
-    if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
+    // Skip targets which don't support tail call on the specific case.
+    if (TTI.supportsTailCallFor(Call) &&
+        simplifyTerminatorLeadingToRet(Call->getNextNode())) {
       Call->setTailCallKind(CallInst::TCK_MustTail);
       changed = true;
     }
@@ -1610,7 +1612,7 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
   // FIXME: Could we support symmetric transfer effectively without musttail
   // call?
   if (TTI.supportsTailCalls())
-    addMustTailToCoroResumes(*ResumeClone);
+    addMustTailToCoroResumes(*ResumeClone, TTI);
 
   // Store addresses resume/destroy/cleanup functions in the coroutine frame.
   updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll
new file mode 100644
index 0000000000000..f8d4c36aa9044
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll
@@ -0,0 +1,74 @@
+; Tests that some target (e.g. ppc) can support tail call under condition.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \
+; RUN:     -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \
+; RUN:     -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 --code-model=medium \
+; RUN:     | FileCheck %s --check-prefix=CHECK-PCREL
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call i8* @malloc(i64 16) #3
+  %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %save = call token @llvm.coro.save(i8* null)
+  %addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv1 = bitcast i8* %addr1 to void (i8*)*
+  call fastcc void %pv1(i8* null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(i8* null)
+  %addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv2 = bitcast i8* %addr2 to void (i8*)*
+  call fastcc void %pv2(i8* null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; Verify that in the initial function resume is not marked with musttail.
+; CHECK-LABEL: @f(
+; CHECK: %[[addr1:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-NEXT: %[[pv1:.+]] = bitcast i8* %[[addr1]] to void (i8*)*
+; CHECK-NOT: musttail call fastcc void %[[pv1]](i8* null)
+
+; Verify that ppc target not using PC-Relative addressing in the resume part resume call is not marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
+; CHECK-NEXT: call fastcc void %[[pv2]](i8* null)
+
+; Verify that ppc target using PC-Relative addressing in the resume part resume call is marked with musttail.
+; CHECK-PCREL-LABEL: @f.resume(
+; CHECK-PCREL: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-PCREL-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
+; CHECK-PCREL-NEXT: musttail call fastcc void %[[pv2]](i8* null)
+; CHECK-PCREL-NEXT: ret void
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*) #2
+declare i8* @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
+declare i1 @llvm.coro.end(i8*, i1) #2
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1
+declare i8* @malloc(i64)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }


        


More information about the cfe-commits mailing list