[llvm] [GlobalOpt] Add TTI interface useFastCCForInternalCall for FASTCC (PR #164768)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 23 00:58:34 PDT 2025


https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/164768

Background: X86 APX feature adds 16 registers within the same 64-bit mode. PR #164638 is trying to extend such registers for FASTCC. However, a blocker issue is calling convention cannot be changeable with or without a feature.

The solution is to disable FASTCC if APX is not ready. This is an NFC change to the final code generation, becasue X86 doesn't define an alternative ABI for FASTCC in 64-bit mode. We can solve the potential compatibility issue of #164638 with this patch.

>From 1c967de5abf29268c22e5239aab03196446310d0 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Thu, 23 Oct 2025 15:37:07 +0800
Subject: [PATCH] [GlobalOpt] Add TTI interface useFastCCForInternalCall for
 FASTCC

Background: X86 APX feature adds 16 registers within the same 64-bit
mode. PR #164638 is trying to extend such registers for FASTCC. However,
a blocker issue is calling convention cannot be changeable with or
without a feature.

The solution is to disable FASTCC if APX is not ready. This is an NFC
change to the final code generation, becasue X86 doesn't define an
alternative ABI for FASTCC in 64-bit mode. We can solve the potential
compatibility issue of #164638 with this patch.
---
 llvm/include/llvm/Analysis/TargetTransformInfo.h  |  4 ++++
 .../llvm/Analysis/TargetTransformInfoImpl.h       |  2 ++
 llvm/lib/Analysis/TargetTransformInfo.cpp         |  4 ++++
 llvm/lib/Target/X86/X86TargetTransformInfo.h      |  4 ++++
 llvm/lib/Transforms/IPO/GlobalOpt.cpp             | 15 +++++++++------
 .../GlobalOpt/null-check-is-use-pr35760.ll        |  2 +-
 .../GlobalOpt/null-check-not-use-pr35760.ll       |  2 +-
 llvm/test/tools/gold/X86/merge-functions.ll       |  4 ++--
 llvm/test/tools/gold/X86/unified-lto.ll           |  4 ++--
 9 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 5d3b233ed6b6a..f52fb448fc584 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -943,6 +943,10 @@ class TargetTransformInfo {
   ///  should use coldcc calling convention.
   LLVM_ABI bool useColdCCForColdCall(Function &F) const;
 
+  /// Return true if the input function is internal, should use fastcc calling
+  /// convention.
+  LLVM_ABI bool useFastCCForInternalCall(Function &F) const;
+
   LLVM_ABI bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const;
 
   /// Identifies if the vector form of the intrinsic has a scalar operand.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4cd607c0d0c8d..064e28c504af4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -431,6 +431,8 @@ class TargetTransformInfoImplBase {
 
   virtual bool useColdCCForColdCall(Function &F) const { return false; }
 
+  virtual bool useFastCCForInternalCall(Function &F) const { return true; }
+
   virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
     return false;
   }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index bf62623099a97..dd65d8375828c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -609,6 +609,10 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
   return TTIImpl->useColdCCForColdCall(F);
 }
 
+bool TargetTransformInfo::useFastCCForInternalCall(Function &F) const {
+  return TTIImpl->useFastCCForInternalCall(F);
+}
+
 bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable(
     Intrinsic::ID ID) const {
   return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 133b3668a46c8..609861a53a0a0 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -319,6 +319,10 @@ class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> {
   unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
                              Type *ScalarValTy) const override;
 
+  bool useFastCCForInternalCall(Function &F) const override {
+    return !ST->is64Bit() || ST->hasEGPR();
+  }
+
 private:
   bool supportsGather() const;
   InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 99c4982c58b47..1516a5bb7a6c2 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2018,12 +2018,15 @@ OptimizeFunctions(Module &M,
 
     if (hasChangeableCC(&F, ChangeableCCCache)) {
       // If this function has a calling convention worth changing, is not a
-      // varargs function, and is only called directly, promote it to use the
-      // Fast calling convention.
-      F.setCallingConv(CallingConv::Fast);
-      ChangeCalleesToFastCall(&F);
-      ++NumFastCallFns;
-      Changed = true;
+      // varargs function, is only called directly, and is supported by the
+      // target, promote it to use the Fast calling convention.
+      TargetTransformInfo &TTI = GetTTI(F);
+      if (TTI.useFastCCForInternalCall(F)) {
+        F.setCallingConv(CallingConv::Fast);
+        ChangeCalleesToFastCall(&F);
+        ++NumFastCallFns;
+        Changed = true;
+      }
     }
 
     if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
diff --git a/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll b/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
index 70923c547940c..4a0c93f09c7df 100644
--- a/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
+++ b/llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll
@@ -12,7 +12,7 @@ define dso_local i32 @main() {
 ; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    store ptr null, ptr @_ZL3g_i, align 8
-; CHECK-NEXT:    call fastcc void @_ZL13PutsSomethingv()
+; CHECK-NEXT:    call void @_ZL13PutsSomethingv()
 ; CHECK-NEXT:    ret i32 0
 ;
 bb:
diff --git a/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll b/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
index a499fe1e4ad92..2b92d856d1848 100644
--- a/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
+++ b/llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll
@@ -15,7 +15,7 @@ define dso_local i32 @main() {
 ; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    store ptr null, ptr @_ZL3g_i, align 8
-; CHECK-NEXT:    call fastcc void @_ZL13PutsSomethingv()
+; CHECK-NEXT:    call void @_ZL13PutsSomethingv()
 ; CHECK-NEXT:    ret i32 0
 ;
 bb:
diff --git a/llvm/test/tools/gold/X86/merge-functions.ll b/llvm/test/tools/gold/X86/merge-functions.ll
index d4a49b1c40b47..296e7aa3f76f7 100644
--- a/llvm/test/tools/gold/X86/merge-functions.ll
+++ b/llvm/test/tools/gold/X86/merge-functions.ll
@@ -11,8 +11,8 @@
 
 ; Check that we've merged foo and bar
 ; CHECK:      define dso_local noundef i32 @main()
-; CHECK-NEXT:   tail call fastcc void @bar()
-; CHECK-NEXT:   tail call fastcc void @bar()
+; CHECK-NEXT:   tail call void @bar()
+; CHECK-NEXT:   tail call void @bar()
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"
diff --git a/llvm/test/tools/gold/X86/unified-lto.ll b/llvm/test/tools/gold/X86/unified-lto.ll
index e5030e863a64a..24eb94a08de39 100644
--- a/llvm/test/tools/gold/X86/unified-lto.ll
+++ b/llvm/test/tools/gold/X86/unified-lto.ll
@@ -25,10 +25,10 @@
 ; Constant propagation is not supported by thin LTO.
 ; With full LTO we fold argument into constant 43
 ; CHECK:       define dso_local noundef i32 @main()
-; CHECK-NEXT:    tail call fastcc void @foo()
+; CHECK-NEXT:    tail call void @foo()
 ; CHECK-NEXT:    ret i32 43
 
-; CHECK:       define internal fastcc void @foo()
+; CHECK:       define internal void @foo()
 ; CHECK-NEXT:    store i32 43, ptr @_g, align 4
 
 ; ThinLTO doesn't import foo, because the latter has noinline attribute



More information about the llvm-commits mailing list