[llvm] [AArch64][SME] Extend Inliner cost-model with custom penalty for calls. (PR #68416)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 31 02:01:10 PDT 2023


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/68416

>From ed8487adf6c8d511457885ad8fe96b9dde844b1d Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 11 Sep 2023 15:06:30 +0100
Subject: [PATCH 1/4] [AArch64][SME] Extend Inliner cost-model with custom
 penalty for calls.

This patch has two purposes:
(1) It tries to make inlining more likely when it can avoid a streaming-mode change.
(2) It avoids inlining when inlining causes more streaming-mode changes.

An example of (1) is:

  void streaming_compatible_bar(void);

  void foo(void) __arm_streaming {
    /* other code */
    streaming_compatible_bar();
    /* other code */
  }

  void f(void) {
    foo();            // expensive streaming mode change
  }

  ->

  void f(void) {
    /* other code */
    streaming_compatible_bar();
    /* other code */
  }

  where it wouldn't have inlined the function when foo would be a non-streaming function.

An example of (2) is:

  void streaming_bar(void) __arm_streaming;

  void foo(void) __arm_streaming {
    streaming_bar();
    streaming_bar();
  }

  void f(void) {
    foo();            // expensive streaming mode change
  }

  -> (do not inline into)

  void f(void) {
    streaming_bar();  // these are now two expensive streaming mode changes
    streaming_bar();
  }
---
 llvm/include/llvm/Analysis/InlineCost.h       |  3 +-
 .../llvm/Analysis/TargetTransformInfo.h       | 16 ++++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  5 +
 llvm/lib/Analysis/InlineCost.cpp              | 15 +--
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  6 ++
 .../AArch64/AArch64TargetTransformInfo.cpp    | 34 +++++++
 .../AArch64/AArch64TargetTransformInfo.h      |  3 +
 llvm/lib/Transforms/IPO/PartialInlining.cpp   |  6 +-
 .../sme-pstatesm-attrs-low-threshold.ll       | 43 +++++++++
 .../Inline/AArch64/sme-pstatesm-attrs.ll      | 95 +++++++++++++++++++
 10 files changed, 216 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll

diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 57f452853d2d6d6..3f0bb879e021fd7 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -259,7 +259,8 @@ InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel);
 
 /// Return the cost associated with a callsite, including parameter passing
 /// and the call/return instruction.
-int getCallsiteCost(const CallBase &Call, const DataLayout &DL);
+int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
+                    const DataLayout &DL);
 
 /// Get an InlineCost object representing the cost of inlining this
 /// callsite.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 5234ef8788d9e96..7a85c03d659232e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1506,6 +1506,15 @@ class TargetTransformInfo {
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
+  /// Returns a penalty for invoking call \p Call in \p F.
+  /// For example, if a function F calls a function G, which in turn calls
+  /// function H, then getInlineCallPenalty(F, H()) would return the
+  /// penalty of calling H from F, e.g. after inlining G into F.
+  /// \p DefaultCallPenalty is passed to give a default penalty that
+  /// the target can amend or override.
+  unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
+                                unsigned DefaultCallPenalty) const;
+
   /// \returns True if the caller and callee agree on how \p Types will be
   /// passed to or returned from the callee.
   /// to the callee.
@@ -2001,6 +2010,8 @@ class TargetTransformInfo::Concept {
       std::optional<uint32_t> AtomicCpySize) const = 0;
   virtual bool areInlineCompatible(const Function *Caller,
                                    const Function *Callee) const = 0;
+  virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
+                                        unsigned DefaultCallPenalty) const = 0;
   virtual bool areTypesABICompatible(const Function *Caller,
                                      const Function *Callee,
                                      const ArrayRef<Type *> &Types) const = 0;
@@ -2662,6 +2673,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
                            const Function *Callee) const override {
     return Impl.areInlineCompatible(Caller, Callee);
   }
+  unsigned
+  getInlineCallPenalty(const Function *F, const CallBase &Call,
+                       unsigned DefaultCallPenalty) const override {
+    return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
+  }
   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
                              const ArrayRef<Type *> &Types) const override {
     return Impl.areTypesABICompatible(Caller, Callee, Types);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index e14915443513990..2ccf57c22234f9a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -802,6 +802,11 @@ class TargetTransformInfoImplBase {
             Callee->getFnAttribute("target-features"));
   }
 
+  unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
+                                unsigned DefaultCallPenalty) const {
+    return DefaultCallPenalty;
+  }
+
   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
                              const ArrayRef<Type *> &Types) const {
     return (Caller->getFnAttribute("target-cpu") ==
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index fa0c30637633df3..7096e06d925adef 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -695,7 +695,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
       }
     } else
       // Otherwise simply add the cost for merely making the call.
-      addCost(CallPenalty);
+      addCost(TTI.getInlineCallPenalty(CandidateCall.getCaller(), Call,
+                                       CallPenalty));
   }
 
   void onFinalizeSwitch(unsigned JumpTableSize,
@@ -918,7 +919,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
     // Compute the total savings for the call site.
     auto *CallerBB = CandidateCall.getParent();
     BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
-    CycleSavings += getCallsiteCost(this->CandidateCall, DL);
+    CycleSavings += getCallsiteCost(TTI, this->CandidateCall, DL);
     CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
 
     // Remove the cost of the cold basic blocks to model the runtime cost more
@@ -1076,7 +1077,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
 
     // Give out bonuses for the callsite, as the instructions setting them up
     // will be gone after inlining.
-    addCost(-getCallsiteCost(this->CandidateCall, DL));
+    addCost(-getCallsiteCost(TTI, this->CandidateCall, DL));
 
     // If this function uses the coldcc calling convention, prefer not to inline
     // it.
@@ -1315,7 +1316,7 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
 
   InlineResult onAnalysisStart() override {
     increment(InlineCostFeatureIndex::callsite_cost,
-              -1 * getCallsiteCost(this->CandidateCall, DL));
+              -1 * getCallsiteCost(TTI, this->CandidateCall, DL));
 
     set(InlineCostFeatureIndex::cold_cc_penalty,
         (F.getCallingConv() == CallingConv::Cold));
@@ -2887,7 +2888,8 @@ static bool functionsHaveCompatibleAttributes(
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
-int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
+int llvm::getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
+                          const DataLayout &DL) {
   int64_t Cost = 0;
   for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
     if (Call.isByValArgument(I)) {
@@ -2917,7 +2919,8 @@ int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
   }
   // The call instruction also disappears after inlining.
   Cost += InstrCost;
-  Cost += CallPenalty;
+  Cost += TTI.getInlineCallPenalty(Call.getCaller(), Call, CallPenalty);
+
   return std::min<int64_t>(Cost, INT_MAX);
 }
 
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index aad14f21d114619..10ed3a4437dae5f 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1133,6 +1133,12 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
   return TTIImpl->areInlineCompatible(Caller, Callee);
 }
 
+unsigned TargetTransformInfo::getInlineCallPenalty(
+    const Function *F, const CallBase &Call,
+    unsigned DefaultCallPenalty) const {
+  return TTIImpl->getInlineCallPenalty(F, Call, DefaultCallPenalty);
+}
+
 bool TargetTransformInfo::areTypesABICompatible(
     const Function *Caller, const Function *Callee,
     const ArrayRef<Type *> &Types) const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6bbd7009e2378a0..adc7e37d4525b63 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -269,6 +269,40 @@ bool AArch64TTIImpl::areTypesABICompatible(
   return true;
 }
 
+unsigned
+AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
+                                     unsigned DefaultCallPenalty) const {
+  // This function calculates a penalty for executing Call in F.
+  //
+  // There are two ways this function can be called:
+  // (1)  F:
+  //       call from F -> G (the call here is Call)
+  //
+  // For (1), Call.getCaller() == F, so it will always return a high cost if
+  // a streaming-mode change is required (thus promoting the need to inline the
+  // function)
+  //
+  // (2)  F:
+  //       call from F -> G (the call here is not Call)
+  //      G:
+  //       call from G -> H (the call here is Call)
+  //
+  // For (2), if after inlining the body of G into F the call to H requires a
+  // streaming-mode change, and the call to G from F would also require a
+  // streaming-mode change, then there is benefit to do the streaming-mode
+  // change only once and avoid inlining of G into F.
+  SMEAttrs FAttrs(*F);
+  SMEAttrs CalleeAttrs(Call);
+  if (FAttrs.requiresSMChange(CalleeAttrs)) {
+    if (F == Call.getCaller())                                // (1)
+      return 5 * DefaultCallPenalty;
+    if (FAttrs.requiresSMChange(SMEAttrs(*Call.getCaller()))) // (2)
+      return 10 * DefaultCallPenalty;
+  }
+
+  return DefaultCallPenalty;
+}
+
 bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
     TargetTransformInfo::RegisterKind K) const {
   assert(K != TargetTransformInfo::RGK_Scalar);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c08004ad299fd68..fa4c93d5f77a196 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -80,6 +80,9 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
                              const ArrayRef<Type *> &Types) const;
 
+  unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
+                                unsigned DefaultCallPenalty) const;
+
   /// \name Scalar TTI Implementations
   /// @{
 
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 25da06add24f031..aa4f205ec5bdf1e 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -767,7 +767,7 @@ bool PartialInlinerImpl::shouldPartialInline(
   const DataLayout &DL = Caller->getParent()->getDataLayout();
 
   // The savings of eliminating the call:
-  int NonWeightedSavings = getCallsiteCost(CB, DL);
+  int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
   BlockFrequency NormWeightedSavings(NonWeightedSavings);
 
   // Weighted saving is smaller than weighted cost, return false
@@ -842,12 +842,12 @@ PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
     }
 
     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
-      InlineCost += getCallsiteCost(*CI, DL);
+      InlineCost += getCallsiteCost(*TTI, *CI, DL);
       continue;
     }
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
-      InlineCost += getCallsiteCost(*II, DL);
+      InlineCost += getCallsiteCost(*TTI, *II, DL);
       continue;
     }
 
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
new file mode 100644
index 000000000000000..f207efaeaad36b8
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline  -inlinedefault-threshold=1 | FileCheck %s
+
+; This test sets the inline-threshold to 1 such that by default the call to @streaming_callee is not inlined.
+; However, if the call to @streaming_callee requires a streaming-mode change, it should always inline the call because the streaming-mode change is more expensive.
+target triple = "aarch64"
+
+declare void @streaming_compatible_f() "aarch64_pstate_sm_compatible"
+
+define void @streaming_callee() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_callee
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    call void @streaming_compatible_f()
+; CHECK-NEXT:    call void @streaming_compatible_f()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_compatible_f()
+  call void @streaming_compatible_f()
+  ret void
+}
+
+; Inline call to @streaming_callee to remove a streaming mode change.
+define void @non_streaming_caller_inline() {
+; CHECK-LABEL: define void @non_streaming_caller_inline
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    call void @streaming_compatible_f()
+; CHECK-NEXT:    call void @streaming_compatible_f()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_callee()
+  ret void
+}
+
+; Don't inline call to @streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change.
+define void @streaming_caller_dont_inline() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_dont_inline
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_callee()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_callee()
+  ret void
+}
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
index f2f5768dbe9c6e9..d6b1f3ef45e7655 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -581,3 +581,98 @@ entry:
   %res = call i64 @normal_callee_call_sme_state()
   ret i64 %res
 }
+
+
+
+declare void @streaming_body() "aarch64_pstate_sm_enabled"
+
+define void @streaming_caller_single_streaming_callee() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_single_streaming_callee
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @streaming_body()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_body()
+  ret void
+}
+
+define void @streaming_caller_multiple_streaming_callees() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @streaming_body()
+; CHECK-NEXT:    call void @streaming_body()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_body()
+  call void @streaming_body()
+  ret void
+}
+
+; Allow inlining, as inline it would not increase the number of streaming-mode changes.
+define void @streaming_caller_single_streaming_callee_inline() {
+; CHECK-LABEL: define void @streaming_caller_single_streaming_callee_inline
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_body()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_caller_single_streaming_callee()
+  ret void
+}
+
+; Prevent inlining, as inline it would lead to multiple streaming-mode changes.
+define void @streaming_caller_multiple_streaming_callees_dont_inline() {
+; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees_dont_inline
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_caller_multiple_streaming_callees()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_caller_multiple_streaming_callees()
+  ret void
+}
+
+declare void @streaming_compatible_body() "aarch64_pstate_sm_compatible"
+
+define void @streaming_caller_single_streaming_compatible_callee() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_compatible_body()
+  ret void
+}
+
+define void @streaming_caller_multiple_streaming_compatible_callees() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_compatible_body()
+  call void @streaming_compatible_body()
+  ret void
+}
+
+; Allow inlining, as inline would remove a streaming-mode change.
+define void @streaming_caller_single_streaming_compatible_callee_inline() {
+; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee_inline
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_caller_single_streaming_compatible_callee()
+  ret void
+}
+
+; Allow inlining, as inline would remove several stremaing-mode changes.
+define void @streaming_caller_multiple_streaming_compatible_callees_inline() {
+; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees_inline
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-NEXT:    ret void
+;
+  call void @streaming_caller_multiple_streaming_compatible_callees()
+  ret void
+}

>From b2b29afec3007357bef43a11775a614db3416118 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 30 Oct 2023 10:39:21 +0000
Subject: [PATCH 2/4] Use cl::opt for call penalties

---
 .../Target/AArch64/AArch64TargetTransformInfo.cpp  | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index adc7e37d4525b63..b75cbbc1cbd5381 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -46,6 +46,16 @@ static cl::opt<unsigned>
     NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10),
                                cl::Hidden);
 
+static cl::opt<unsigned> CallPenaltyChangeSM(
+    "call-penalty-sm-change", cl::init(5), cl::Hidden,
+    cl::desc(
+        "Penalty of calling a function that requires a change to PSTATE.SM"));
+
+static cl::opt<unsigned> InlineCallPenaltyChangeSM(
+    "inline-call-penalty-sm-change", cl::init(10), cl::Hidden,
+    cl::desc(
+        "Penalty of inlining a call that requires a change to PSTATE.SM"));
+
 namespace {
 class TailFoldingOption {
   // These bitfields will only ever be set to something non-zero in operator=,
@@ -295,9 +305,9 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
   SMEAttrs CalleeAttrs(Call);
   if (FAttrs.requiresSMChange(CalleeAttrs)) {
     if (F == Call.getCaller())                                // (1)
-      return 5 * DefaultCallPenalty;
+      return CallPenaltyChangeSM * DefaultCallPenalty;
     if (FAttrs.requiresSMChange(SMEAttrs(*Call.getCaller()))) // (2)
-      return 10 * DefaultCallPenalty;
+      return InlineCallPenaltyChangeSM * DefaultCallPenalty;
   }
 
   return DefaultCallPenalty;

>From 3ac137deb9326eddbcb42406560e30b8cd67bf2c Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 31 Oct 2023 08:48:13 +0000
Subject: [PATCH 3/4] Clang-format before committing

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h       | 5 ++---
 llvm/lib/Analysis/TargetTransformInfo.cpp              | 7 ++++---
 llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 5 ++---
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7a85c03d659232e..4f5f85a2b3c8e56 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -2673,9 +2673,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
                            const Function *Callee) const override {
     return Impl.areInlineCompatible(Caller, Callee);
   }
-  unsigned
-  getInlineCallPenalty(const Function *F, const CallBase &Call,
-                       unsigned DefaultCallPenalty) const override {
+  unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
+                                unsigned DefaultCallPenalty) const override {
     return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
   }
   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 10ed3a4437dae5f..caa9b17ae695e49 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1133,9 +1133,10 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
   return TTIImpl->areInlineCompatible(Caller, Callee);
 }
 
-unsigned TargetTransformInfo::getInlineCallPenalty(
-    const Function *F, const CallBase &Call,
-    unsigned DefaultCallPenalty) const {
+unsigned
+TargetTransformInfo::getInlineCallPenalty(const Function *F,
+                                          const CallBase &Call,
+                                          unsigned DefaultCallPenalty) const {
   return TTIImpl->getInlineCallPenalty(F, Call, DefaultCallPenalty);
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b75cbbc1cbd5381..09eb01ed6f27e48 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -53,8 +53,7 @@ static cl::opt<unsigned> CallPenaltyChangeSM(
 
 static cl::opt<unsigned> InlineCallPenaltyChangeSM(
     "inline-call-penalty-sm-change", cl::init(10), cl::Hidden,
-    cl::desc(
-        "Penalty of inlining a call that requires a change to PSTATE.SM"));
+    cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));
 
 namespace {
 class TailFoldingOption {
@@ -304,7 +303,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
   SMEAttrs FAttrs(*F);
   SMEAttrs CalleeAttrs(Call);
   if (FAttrs.requiresSMChange(CalleeAttrs)) {
-    if (F == Call.getCaller())                                // (1)
+    if (F == Call.getCaller()) // (1)
       return CallPenaltyChangeSM * DefaultCallPenalty;
     if (FAttrs.requiresSMChange(SMEAttrs(*Call.getCaller()))) // (2)
       return InlineCallPenaltyChangeSM * DefaultCallPenalty;

>From e6e19714f7d3a5aeabf276fa70781206c90d7e51 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 31 Oct 2023 09:00:27 +0000
Subject: [PATCH 4/4] Add comment to test

---
 .../Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll          | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
index f207efaeaad36b8..72003d2fee4bac6 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
@@ -7,6 +7,8 @@ target triple = "aarch64"
 
 declare void @streaming_compatible_f() "aarch64_pstate_sm_compatible"
 
+; Function @streaming_callee doesn't contain any operations that may use ZA
+; state and therefore can be legally inlined into a normal function.
 define void @streaming_callee() "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define void @streaming_callee
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {



More information about the llvm-commits mailing list