[llvm] Fixes inlining issue in armv7 (PR #169337)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 25 02:24:13 PST 2025


https://github.com/CrooseGit updated https://github.com/llvm/llvm-project/pull/169337

>From ea8e67094ea25c7b1d4367de491afdec4d7d7212 Mon Sep 17 00:00:00 2001
From: reucru01 <reuben.cruise at arm.com>
Date: Thu, 20 Nov 2025 16:10:39 +0000
Subject: [PATCH 1/3] Adds HasV8Ops, FeatureDotProd to InlineAllowed

Fixes issue where functions are not inlined when caller has these
features, but callee does not.
---
 llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 100 +++++++++++++------
 1 file changed, 68 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 919a6fc9fd0b0..e11bc7298aabf 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -40,13 +40,13 @@ class Type;
 class Value;
 
 namespace TailPredication {
-  enum Mode {
-    Disabled = 0,
-    EnabledNoReductions,
-    Enabled,
-    ForceEnabledNoReductions,
-    ForceEnabled
-  };
+enum Mode {
+  Disabled = 0,
+  EnabledNoReductions,
+  Enabled,
+  ForceEnabledNoReductions,
+  ForceEnabled
+};
 }
 
 // For controlling conversion of memcpy into Tail Predicated loop.
@@ -70,31 +70,67 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
   // fail if the callee uses ARM only instructions, e.g. in inline asm.
   const FeatureBitset InlineFeaturesAllowed = {
-      ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
-      ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
-      ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
-      ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
-      ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
-      ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
-      ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
-      ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
-      ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
-      ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
-      ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
-      ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
-      ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
-      ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
-      ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
-      ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
-      ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
-      ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
-      ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
-      ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
-      ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
-      ARM::FeatureAClass, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
-      ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
-      ARM::FeatureNoNegativeImmediates
-  };
+      ARM::FeatureDotProd,
+      ARM::HasV8Ops,
+      ARM::FeatureVFP2,
+      ARM::FeatureVFP3,
+      ARM::FeatureNEON,
+      ARM::FeatureThumb2,
+      ARM::FeatureFP16,
+      ARM::FeatureVFP4,
+      ARM::FeatureFPARMv8,
+      ARM::FeatureFullFP16,
+      ARM::FeatureFP16FML,
+      ARM::FeatureHWDivThumb,
+      ARM::FeatureHWDivARM,
+      ARM::FeatureDB,
+      ARM::FeatureV7Clrex,
+      ARM::FeatureAcquireRelease,
+      ARM::FeatureSlowFPBrcc,
+      ARM::FeaturePerfMon,
+      ARM::FeatureTrustZone,
+      ARM::Feature8MSecExt,
+      ARM::FeatureCrypto,
+      ARM::FeatureCRC,
+      ARM::FeatureRAS,
+      ARM::FeatureFPAO,
+      ARM::FeatureFuseAES,
+      ARM::FeatureZCZeroing,
+      ARM::FeatureProfUnpredicate,
+      ARM::FeatureSlowVGETLNi32,
+      ARM::FeatureSlowVDUP32,
+      ARM::FeaturePreferVMOVSR,
+      ARM::FeaturePrefISHSTBarrier,
+      ARM::FeatureMuxedUnits,
+      ARM::FeatureSlowOddRegister,
+      ARM::FeatureSlowLoadDSubreg,
+      ARM::FeatureDontWidenVMOVS,
+      ARM::FeatureExpandMLx,
+      ARM::FeatureHasVMLxHazards,
+      ARM::FeatureNEONForFPMovs,
+      ARM::FeatureNEONForFP,
+      ARM::FeatureCheckVLDnAlign,
+      ARM::FeatureHasSlowFPVMLx,
+      ARM::FeatureHasSlowFPVFMx,
+      ARM::FeatureVMLxForwarding,
+      ARM::FeaturePref32BitThumb,
+      ARM::FeatureAvoidPartialCPSR,
+      ARM::FeatureCheapPredicableCPSR,
+      ARM::FeatureAvoidMOVsShOp,
+      ARM::FeatureHasRetAddrStack,
+      ARM::FeatureHasNoBranchPredictor,
+      ARM::FeatureDSP,
+      ARM::FeatureMP,
+      ARM::FeatureVirtualization,
+      ARM::FeatureMClass,
+      ARM::FeatureRClass,
+      ARM::FeatureAClass,
+      ARM::FeatureStrictAlign,
+      ARM::FeatureLongCalls,
+      ARM::FeatureExecuteOnly,
+      ARM::FeatureReserveR9,
+      ARM::FeatureNoMovt,
+      ARM::FeatureNoNegativeImmediates};
 
   const ARMSubtarget *getST() const { return ST; }
   const ARMTargetLowering *getTLI() const { return TLI; }

>From 0922a15f1c219eb5d11d8d32e0e8007fd36af39a Mon Sep 17 00:00:00 2001
From: reucru01 <reuben.cruise at arm.com>
Date: Tue, 25 Nov 2025 10:06:24 +0000
Subject: [PATCH 2/3] Adds debug statements to ARM areInlineCompatible

This makes it easier to see why your function isn't getting inlined for.
---
 .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index d12b802fe234f..f0d378b66883f 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -102,6 +102,53 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
   // the callers'.
   bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
                      (CalleeBits & InlineFeaturesAllowed);
+
+  LLVM_DEBUG({
+    dbgs() << "=== Inline compatibility debug ===\n";
+    dbgs() << "Caller: " << Caller->getName() << "\n";
+    dbgs() << "Callee: " << Callee->getName() << "\n";
+
+    // Bit diffs
+    FeatureBitset MissingInCaller = CalleeBits & ~CallerBits; // callee-only
+    FeatureBitset ExtraInCaller = CallerBits & ~CalleeBits;   // caller-only
+
+    // Counts
+    dbgs() << "Only-in-caller bit count: " << ExtraInCaller.count() << "\n";
+    dbgs() << "Only-in-callee bit count: " << MissingInCaller.count() << "\n";
+
+    dbgs() << "Only-in-caller feature indices [";
+    {
+      bool First = true;
+      for (size_t I = 0, E = ExtraInCaller.size(); I < E; ++I) {
+        if (ExtraInCaller.test(I)) {
+          if (!First)
+            dbgs() << ", ";
+          dbgs() << I;
+          First = false;
+        }
+      }
+    }
+    dbgs() << "]\n";
+
+    dbgs() << "Only-in-callee feature indices [";
+    {
+      bool First = true;
+      for (size_t I = 0, E = MissingInCaller.size(); I < E; ++I) {
+        if (MissingInCaller.test(I)) {
+          if (!First)
+            dbgs() << ", ";
+          dbgs() << I;
+          First = false;
+        }
+      }
+    }
+    dbgs() << "]\n";
+
+    // Indicies map to features as found in
+    // llvm-project/(your_build)/lib/Target/ARM/ARMGenSubtargetInfo.inc
+    dbgs() << "MatchExact=" << (MatchExact ? "true" : "false")
+           << " MatchSubset=" << (MatchSubset ? "true" : "false") << "\n";
+  });
   return MatchExact && MatchSubset;
 }
 

>From c053e0773294fd278ee83976aacbd60478b11a9a Mon Sep 17 00:00:00 2001
From: reucru01 <reuben.cruise at arm.com>
Date: Tue, 25 Nov 2025 10:09:56 +0000
Subject: [PATCH 3/3] Adds BF16 and SB to InlineFeaturesAllowed

These features being present in the caller and not the callee should not prevent a function from being inlined.
---
 llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index e11bc7298aabf..7e271785661e9 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -72,6 +72,8 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
   const FeatureBitset InlineFeaturesAllowed = {
       ARM::FeatureDotProd,
       ARM::HasV8Ops,
+      ARM::FeatureSB,
+      ARM::FeatureBF16,
       ARM::FeatureVFP2,
       ARM::FeatureVFP3,
       ARM::FeatureNEON,



More information about the llvm-commits mailing list