[clang] [llvm] [GlobalOpt] Add TTI interface useFastCCForInternalCall for FASTCC (PR #164768)
Phoebe Wang via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 30 21:06:34 PDT 2025
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/164768
>From 1c967de5abf29268c22e5239aab03196446310d0 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Thu, 23 Oct 2025 15:37:07 +0800
Subject: [PATCH 1/5] [GlobalOpt] Add TTI interface useFastCCForInternalCall
for FASTCC
Background: X86 APX feature adds 16 registers within the same 64-bit
mode. PR #164638 is trying to extend such registers for FASTCC. However,
a blocker issue is calling convention cannot be changeable with or
without a feature.
The solution is to disable FASTCC if APX is not ready. This is an NFC
change to the final code generation, becasue X86 doesn't define an
alternative ABI for FASTCC in 64-bit mode. We can solve the potential
compatibility issue of #164638 with this patch.
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 4 ++++
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++++
llvm/lib/Target/X86/X86TargetTransformInfo.h | 4 ++++
llvm/lib/Transforms/IPO/GlobalOpt.cpp | 15 +++++++++------
.../GlobalOpt/null-check-is-use-pr35760.ll | 2 +-
.../GlobalOpt/null-check-not-use-pr35760.ll | 2 +-
llvm/test/tools/gold/X86/merge-functions.ll | 4 ++--
llvm/test/tools/gold/X86/unified-lto.ll | 4 ++--
9 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 5d3b233ed6b6a..f52fb448fc584 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -943,6 +943,10 @@ class TargetTransformInfo {
/// should use coldcc calling convention.
LLVM_ABI bool useColdCCForColdCall(Function &F) const;
+ /// Return true if the input function is internal, should use fastcc calling
+ /// convention.
+ LLVM_ABI bool useFastCCForInternalCall(Function &F) const;
+
LLVM_ABI bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const;
/// Identifies if the vector form of the intrinsic has a scalar operand.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4cd607c0d0c8d..064e28c504af4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -431,6 +431,8 @@ class TargetTransformInfoImplBase {
virtual bool useColdCCForColdCall(Function &F) const { return false; }
+ virtual bool useFastCCForInternalCall(Function &F) const { return true; }
+
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
return false;
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index bf62623099a97..dd65d8375828c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -609,6 +609,10 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
return TTIImpl->useColdCCForColdCall(F);
}
+bool TargetTransformInfo::useFastCCForInternalCall(Function &F) const {
+ return TTIImpl->useFastCCForInternalCall(F);
+}
+
bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable(
Intrinsic::ID ID) const {
return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 133b3668a46c8..609861a53a0a0 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -319,6 +319,10 @@ class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> {
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const override;
+ bool useFastCCForInternalCall(Function &F) const override {
+ return !ST->is64Bit() || ST->hasEGPR();
+ }
+
private:
bool supportsGather() const;
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 99c4982c58b47..1516a5bb7a6c2 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2018,12 +2018,15 @@ OptimizeFunctions(Module &M,
if (hasChangeableCC(&F, ChangeableCCCache)) {
// If this function has a calling convention worth changing, is not a
- // varargs function, and is only called directly, promote it to use the
- // Fast calling convention.
- F.setCallingConv(CallingConv::Fast);
- ChangeCalleesToFastCall(&F);
- ++NumFastCallFns;
- Changed = true;
+ // varargs function, is only called directly, and is supported by the
+ // target, promote it to use the Fast calling convention.
+ TargetTransformInfo &TTI = GetTTI(F);
+ if (TTI.useFastCCForInternalCall(F)) {
+ F.setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(&F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
}
if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
diff --git a/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll b/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
index 70923c547940c..4a0c93f09c7df 100644
--- a/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
+++ b/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
@@ -12,7 +12,7 @@ define dso_local i32 @main() {
; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
; CHECK-NEXT: bb:
; CHECK-NEXT: store ptr null, ptr @_ZL3g_i, align 8
-; CHECK-NEXT: call fastcc void @_ZL13PutsSomethingv()
+; CHECK-NEXT: call void @_ZL13PutsSomethingv()
; CHECK-NEXT: ret i32 0
;
bb:
diff --git a/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll b/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
index a499fe1e4ad92..2b92d856d1848 100644
--- a/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
+++ b/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
@@ -15,7 +15,7 @@ define dso_local i32 @main() {
; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
; CHECK-NEXT: bb:
; CHECK-NEXT: store ptr null, ptr @_ZL3g_i, align 8
-; CHECK-NEXT: call fastcc void @_ZL13PutsSomethingv()
+; CHECK-NEXT: call void @_ZL13PutsSomethingv()
; CHECK-NEXT: ret i32 0
;
bb:
diff --git a/llvm/test/tools/gold/X86/merge-functions.ll b/llvm/test/tools/gold/X86/merge-functions.ll
index d4a49b1c40b47..296e7aa3f76f7 100644
--- a/llvm/test/tools/gold/X86/merge-functions.ll
+++ b/llvm/test/tools/gold/X86/merge-functions.ll
@@ -11,8 +11,8 @@
; Check that we've merged foo and bar
; CHECK: define dso_local noundef i32 @main()
-; CHECK-NEXT: tail call fastcc void @bar()
-; CHECK-NEXT: tail call fastcc void @bar()
+; CHECK-NEXT: tail call void @bar()
+; CHECK-NEXT: tail call void @bar()
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
diff --git a/llvm/test/tools/gold/X86/unified-lto.ll b/llvm/test/tools/gold/X86/unified-lto.ll
index e5030e863a64a..24eb94a08de39 100644
--- a/llvm/test/tools/gold/X86/unified-lto.ll
+++ b/llvm/test/tools/gold/X86/unified-lto.ll
@@ -25,10 +25,10 @@
; Constant propagation is not supported by thin LTO.
; With full LTO we fold argument into constant 43
; CHECK: define dso_local noundef i32 @main()
-; CHECK-NEXT: tail call fastcc void @foo()
+; CHECK-NEXT: tail call void @foo()
; CHECK-NEXT: ret i32 43
-; CHECK: define internal fastcc void @foo()
+; CHECK: define internal void @foo()
; CHECK-NEXT: store i32 43, ptr @_g, align 4
; ThinLTO doesn't import foo, because the latter has noinline attribute
>From 03f0ed9afc66be07dbf69b3c2630a3e2d9848835 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Thu, 23 Oct 2025 19:20:33 +0800
Subject: [PATCH 2/5] Check all call sites' containing function too
---
llvm/lib/Transforms/IPO/GlobalOpt.cpp | 23 +++++++++-
llvm/test/Transforms/GlobalOpt/X86/apx.ll | 54 +++++++++++++++++++++++
2 files changed, 75 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/GlobalOpt/X86/apx.ll
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1516a5bb7a6c2..7ae4c05f25332 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1788,6 +1788,26 @@ static void changeCallSitesToColdCC(Function *F) {
Call->setCallingConv(CallingConv::Cold);
}
+// This function checks if the input function F and its all call sites'
+// containing function allow to use fastcc, returning false if any of them don't
+// allow it.
+static bool useFastCCForInternalCall(
+ Function &F, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+ if (!GetTTI(F).useFastCCForInternalCall(F))
+ return false;
+
+ for (User *U : F.users()) {
+ CallBase *CB = dyn_cast<CallBase>(U);
+ if (!CB || CB->getCalledOperand() != &F)
+ continue;
+ Function *CallerFunc = CB->getParent()->getParent();
+ if (!GetTTI(*CallerFunc).useFastCCForInternalCall(F))
+ return false;
+ }
+
+ return true;
+}
+
// This function iterates over all the call instructions in the input Function
// and checks that all call sites are in cold blocks and are allowed to use the
// coldcc calling convention.
@@ -2020,8 +2040,7 @@ OptimizeFunctions(Module &M,
// If this function has a calling convention worth changing, is not a
// varargs function, is only called directly, and is supported by the
// target, promote it to use the Fast calling convention.
- TargetTransformInfo &TTI = GetTTI(F);
- if (TTI.useFastCCForInternalCall(F)) {
+ if (useFastCCForInternalCall(F, GetTTI)) {
F.setCallingConv(CallingConv::Fast);
ChangeCalleesToFastCall(&F);
++NumFastCallFns;
diff --git a/llvm/test/Transforms/GlobalOpt/X86/apx.ll b/llvm/test/Transforms/GlobalOpt/X86/apx.ll
new file mode 100644
index 0000000000000..aaf6abac966e8
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/X86/apx.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mtriple=x86_64 -S -passes=globalopt -o - < %s | FileCheck %s
+
+define void @caller1() {
+; CHECK-LABEL: define void @caller1() local_unnamed_addr {
+; CHECK-NEXT: call void @callee1()
+; CHECK-NEXT: ret void
+;
+ call void @callee1()
+ ret void
+}
+
+define internal void @callee1() "target-features"="+egpr" {
+; CHECK-LABEL: define internal void @callee1(
+; CHECK-SAME: ) unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
+
+define void @caller2() "target-features"="+egpr" {
+; CHECK-LABEL: define void @caller2(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: call void @callee2()
+; CHECK-NEXT: ret void
+;
+ call void @callee2()
+ ret void
+}
+
+define internal void @callee2() {
+; CHECK-LABEL: define internal void @callee2() unnamed_addr {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
+
+define void @caller3() "target-features"="+egpr" {
+; CHECK-LABEL: define void @caller3(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: call fastcc void @callee3()
+; CHECK-NEXT: ret void
+;
+ call void @callee3()
+ ret void
+}
+
+define internal void @callee3() "target-features"="+egpr" {
+; CHECK-LABEL: define internal fastcc void @callee3(
+; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
>From 175315f3d7390b278c4119c676de9b3e4423ba06 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Thu, 23 Oct 2025 22:39:46 +0800
Subject: [PATCH 3/5] Fix a lit failure
---
clang/test/CodeGenObjCXX/nrvo.mm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm
index 802dd59aa87b7..c4af2114e2d18 100644
--- a/clang/test/CodeGenObjCXX/nrvo.mm
+++ b/clang/test/CodeGenObjCXX/nrvo.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -o - -fblocks %s -O1 -fno-inline-functions -triple x86_64-apple-darwin10.0.0 -fobjc-runtime=macosx-fragile-10.5 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -fblocks %s -O1 -fno-inline-functions -triple x86_64-apple-darwin10.0.0 -target-feature +egpr -fobjc-runtime=macosx-fragile-10.5 | FileCheck %s
// PR10835
struct X {
>From c95843e4acb6288e0ddac48fdfc9883d858fae03 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Sat, 25 Oct 2025 21:28:16 +0800
Subject: [PATCH 4/5] Move caller check logic to target code
---
clang/test/CodeGenObjCXX/nrvo.mm | 2 +-
.../lib/Target/X86/X86TargetTransformInfo.cpp | 16 +++++++++++++
llvm/lib/Target/X86/X86TargetTransformInfo.h | 4 +---
llvm/lib/Transforms/IPO/GlobalOpt.cpp | 23 ++-----------------
.../GlobalOpt/null-check-is-use-pr35760.ll | 2 +-
.../GlobalOpt/null-check-not-use-pr35760.ll | 2 +-
llvm/test/tools/gold/X86/merge-functions.ll | 4 ++--
llvm/test/tools/gold/X86/unified-lto.ll | 4 ++--
8 files changed, 26 insertions(+), 31 deletions(-)
diff --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm
index c4af2114e2d18..802dd59aa87b7 100644
--- a/clang/test/CodeGenObjCXX/nrvo.mm
+++ b/clang/test/CodeGenObjCXX/nrvo.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -o - -fblocks %s -O1 -fno-inline-functions -triple x86_64-apple-darwin10.0.0 -target-feature +egpr -fobjc-runtime=macosx-fragile-10.5 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -fblocks %s -O1 -fno-inline-functions -triple x86_64-apple-darwin10.0.0 -fobjc-runtime=macosx-fragile-10.5 | FileCheck %s
// PR10835
struct X {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 3d8d0a236a3c1..ad0f077d25377 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -7223,3 +7223,19 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
return false;
}
+
+bool X86TTIImpl::useFastCCForInternalCall(Function &F) const {
+ bool HasEGPR = ST->hasEGPR();
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ for (User *U : F.users()) {
+ CallBase *CB = dyn_cast<CallBase>(U);
+ if (!CB || CB->getCalledOperand() != &F)
+ continue;
+ Function *CallerFunc = CB->getParent()->getParent();
+ if (TM.getSubtarget<X86Subtarget>(*CallerFunc).hasEGPR() != HasEGPR)
+ return false;
+ }
+
+ return true;
+}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 609861a53a0a0..32d5e301a59cc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -319,9 +319,7 @@ class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> {
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const override;
- bool useFastCCForInternalCall(Function &F) const override {
- return !ST->is64Bit() || ST->hasEGPR();
- }
+ bool useFastCCForInternalCall(Function &F) const override;
private:
bool supportsGather() const;
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 7ae4c05f25332..1516a5bb7a6c2 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1788,26 +1788,6 @@ static void changeCallSitesToColdCC(Function *F) {
Call->setCallingConv(CallingConv::Cold);
}
-// This function checks if the input function F and its all call sites'
-// containing function allow to use fastcc, returning false if any of them don't
-// allow it.
-static bool useFastCCForInternalCall(
- Function &F, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
- if (!GetTTI(F).useFastCCForInternalCall(F))
- return false;
-
- for (User *U : F.users()) {
- CallBase *CB = dyn_cast<CallBase>(U);
- if (!CB || CB->getCalledOperand() != &F)
- continue;
- Function *CallerFunc = CB->getParent()->getParent();
- if (!GetTTI(*CallerFunc).useFastCCForInternalCall(F))
- return false;
- }
-
- return true;
-}
-
// This function iterates over all the call instructions in the input Function
// and checks that all call sites are in cold blocks and are allowed to use the
// coldcc calling convention.
@@ -2040,7 +2020,8 @@ OptimizeFunctions(Module &M,
// If this function has a calling convention worth changing, is not a
// varargs function, is only called directly, and is supported by the
// target, promote it to use the Fast calling convention.
- if (useFastCCForInternalCall(F, GetTTI)) {
+ TargetTransformInfo &TTI = GetTTI(F);
+ if (TTI.useFastCCForInternalCall(F)) {
F.setCallingConv(CallingConv::Fast);
ChangeCalleesToFastCall(&F);
++NumFastCallFns;
diff --git a/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll b/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
index 4a0c93f09c7df..70923c547940c 100644
--- a/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
+++ b/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
@@ -12,7 +12,7 @@ define dso_local i32 @main() {
; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
; CHECK-NEXT: bb:
; CHECK-NEXT: store ptr null, ptr @_ZL3g_i, align 8
-; CHECK-NEXT: call void @_ZL13PutsSomethingv()
+; CHECK-NEXT: call fastcc void @_ZL13PutsSomethingv()
; CHECK-NEXT: ret i32 0
;
bb:
diff --git a/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll b/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
index 2b92d856d1848..a499fe1e4ad92 100644
--- a/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
+++ b/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
@@ -15,7 +15,7 @@ define dso_local i32 @main() {
; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
; CHECK-NEXT: bb:
; CHECK-NEXT: store ptr null, ptr @_ZL3g_i, align 8
-; CHECK-NEXT: call void @_ZL13PutsSomethingv()
+; CHECK-NEXT: call fastcc void @_ZL13PutsSomethingv()
; CHECK-NEXT: ret i32 0
;
bb:
diff --git a/llvm/test/tools/gold/X86/merge-functions.ll b/llvm/test/tools/gold/X86/merge-functions.ll
index 296e7aa3f76f7..d4a49b1c40b47 100644
--- a/llvm/test/tools/gold/X86/merge-functions.ll
+++ b/llvm/test/tools/gold/X86/merge-functions.ll
@@ -11,8 +11,8 @@
; Check that we've merged foo and bar
; CHECK: define dso_local noundef i32 @main()
-; CHECK-NEXT: tail call void @bar()
-; CHECK-NEXT: tail call void @bar()
+; CHECK-NEXT: tail call fastcc void @bar()
+; CHECK-NEXT: tail call fastcc void @bar()
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
diff --git a/llvm/test/tools/gold/X86/unified-lto.ll b/llvm/test/tools/gold/X86/unified-lto.ll
index 24eb94a08de39..e5030e863a64a 100644
--- a/llvm/test/tools/gold/X86/unified-lto.ll
+++ b/llvm/test/tools/gold/X86/unified-lto.ll
@@ -25,10 +25,10 @@
; Constant propagation is not supported by thin LTO.
; With full LTO we fold argument into constant 43
; CHECK: define dso_local noundef i32 @main()
-; CHECK-NEXT: tail call void @foo()
+; CHECK-NEXT: tail call fastcc void @foo()
; CHECK-NEXT: ret i32 43
-; CHECK: define internal void @foo()
+; CHECK: define internal fastcc void @foo()
; CHECK-NEXT: store i32 43, ptr @_g, align 4
; ThinLTO doesn't import foo, because the latter has noinline attribute
>From e72e9a09c1afffd0ff87d2350bde2dfd40257c4f Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Fri, 31 Oct 2025 12:06:09 +0800
Subject: [PATCH 5/5] Document changes to fastcc
---
llvm/docs/LangRef.rst | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 1c6823be44dcb..dca6170cda80d 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -333,11 +333,14 @@ added in the future:
(e.g. by passing things in registers). This calling convention
allows the target to use whatever tricks it wants to produce fast
code for the target, without having to conform to an externally
- specified ABI (Application Binary Interface). `Tail calls can only
- be optimized when this, the tailcc, the GHC or the HiPE convention is
- used. <CodeGenerator.html#tail-call-optimization>`_ This calling
- convention does not support varargs and requires the prototype of all
- callees to exactly match the prototype of the function definition.
+ specified ABI (Application Binary Interface). Targets may use different
+ implementations according to different features. In this case, a
+ TTI interface ``useFastCCForInternalCall`` must return false when
+ any caller functions and the callee belong to different implementations.
+ `Tail calls can only be optimized when this, the tailcc, the GHC or the
+ HiPE convention is used. <CodeGenerator.html#tail-call-optimization>`_
+ This calling convention does not support varargs and requires the prototype
+ of all callees to exactly match the prototype of the function definition.
"``coldcc``" - The cold calling convention
This calling convention attempts to make code in the caller as
efficient as possible under the assumption that the call is not
More information about the cfe-commits
mailing list