[llvm] Fixes inlining issue in armv7 (PR #169337)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 25 02:24:13 PST 2025
https://github.com/CrooseGit updated https://github.com/llvm/llvm-project/pull/169337
>From ea8e67094ea25c7b1d4367de491afdec4d7d7212 Mon Sep 17 00:00:00 2001
From: reucru01 <reuben.cruise at arm.com>
Date: Thu, 20 Nov 2025 16:10:39 +0000
Subject: [PATCH 1/3] Adds HasV8Ops, FeatureDotProd to InlineAllowed
Fixes issue where functions are not inlined when caller has these
features, but callee does not.
---
llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 100 +++++++++++++------
1 file changed, 68 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 919a6fc9fd0b0..e11bc7298aabf 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -40,13 +40,13 @@ class Type;
class Value;
namespace TailPredication {
- enum Mode {
- Disabled = 0,
- EnabledNoReductions,
- Enabled,
- ForceEnabledNoReductions,
- ForceEnabled
- };
+enum Mode {
+ Disabled = 0,
+ EnabledNoReductions,
+ Enabled,
+ ForceEnabledNoReductions,
+ ForceEnabled
+};
}
// For controlling conversion of memcpy into Tail Predicated loop.
@@ -70,31 +70,67 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
// fail if the callee uses ARM only instructions, e.g. in inline asm.
const FeatureBitset InlineFeaturesAllowed = {
- ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
- ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
- ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
- ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
- ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
- ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
- ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
- ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
- ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
- ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
- ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
- ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
- ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
- ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
- ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
- ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
- ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
- ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
- ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
- ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
- ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
- ARM::FeatureAClass, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
- ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
- ARM::FeatureNoNegativeImmediates
- };
+ ARM::FeatureDotProd,
+ ARM::HasV8Ops,
+ ARM::FeatureVFP2,
+ ARM::FeatureVFP3,
+ ARM::FeatureNEON,
+ ARM::FeatureThumb2,
+ ARM::FeatureFP16,
+ ARM::FeatureVFP4,
+ ARM::FeatureFPARMv8,
+ ARM::FeatureFullFP16,
+ ARM::FeatureFP16FML,
+ ARM::FeatureHWDivThumb,
+ ARM::FeatureHWDivARM,
+ ARM::FeatureDB,
+ ARM::FeatureV7Clrex,
+ ARM::FeatureAcquireRelease,
+ ARM::FeatureSlowFPBrcc,
+ ARM::FeaturePerfMon,
+ ARM::FeatureTrustZone,
+ ARM::Feature8MSecExt,
+ ARM::FeatureCrypto,
+ ARM::FeatureCRC,
+ ARM::FeatureRAS,
+ ARM::FeatureFPAO,
+ ARM::FeatureFuseAES,
+ ARM::FeatureZCZeroing,
+ ARM::FeatureProfUnpredicate,
+ ARM::FeatureSlowVGETLNi32,
+ ARM::FeatureSlowVDUP32,
+ ARM::FeaturePreferVMOVSR,
+ ARM::FeaturePrefISHSTBarrier,
+ ARM::FeatureMuxedUnits,
+ ARM::FeatureSlowOddRegister,
+ ARM::FeatureSlowLoadDSubreg,
+ ARM::FeatureDontWidenVMOVS,
+ ARM::FeatureExpandMLx,
+ ARM::FeatureHasVMLxHazards,
+ ARM::FeatureNEONForFPMovs,
+ ARM::FeatureNEONForFP,
+ ARM::FeatureCheckVLDnAlign,
+ ARM::FeatureHasSlowFPVMLx,
+ ARM::FeatureHasSlowFPVFMx,
+ ARM::FeatureVMLxForwarding,
+ ARM::FeaturePref32BitThumb,
+ ARM::FeatureAvoidPartialCPSR,
+ ARM::FeatureCheapPredicableCPSR,
+ ARM::FeatureAvoidMOVsShOp,
+ ARM::FeatureHasRetAddrStack,
+ ARM::FeatureHasNoBranchPredictor,
+ ARM::FeatureDSP,
+ ARM::FeatureMP,
+ ARM::FeatureVirtualization,
+ ARM::FeatureMClass,
+ ARM::FeatureRClass,
+ ARM::FeatureAClass,
+ ARM::FeatureStrictAlign,
+ ARM::FeatureLongCalls,
+ ARM::FeatureExecuteOnly,
+ ARM::FeatureReserveR9,
+ ARM::FeatureNoMovt,
+ ARM::FeatureNoNegativeImmediates};
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
>From 0922a15f1c219eb5d11d8d32e0e8007fd36af39a Mon Sep 17 00:00:00 2001
From: reucru01 <reuben.cruise at arm.com>
Date: Tue, 25 Nov 2025 10:06:24 +0000
Subject: [PATCH 2/3] Adds debug statements to ARM areInlineCompatible
This makes it easier to see why your function isn't getting inlined for.
---
.../lib/Target/ARM/ARMTargetTransformInfo.cpp | 47 +++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index d12b802fe234f..f0d378b66883f 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -102,6 +102,53 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
// the callers'.
bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
(CalleeBits & InlineFeaturesAllowed);
+
+ LLVM_DEBUG({
+ dbgs() << "=== Inline compatibility debug ===\n";
+ dbgs() << "Caller: " << Caller->getName() << "\n";
+ dbgs() << "Callee: " << Callee->getName() << "\n";
+
+ // Bit diffs
+ FeatureBitset MissingInCaller = CalleeBits & ~CallerBits; // callee-only
+ FeatureBitset ExtraInCaller = CallerBits & ~CalleeBits; // caller-only
+
+ // Counts
+ dbgs() << "Only-in-caller bit count: " << ExtraInCaller.count() << "\n";
+ dbgs() << "Only-in-callee bit count: " << MissingInCaller.count() << "\n";
+
+ dbgs() << "Only-in-caller feature indices [";
+ {
+ bool First = true;
+ for (size_t I = 0, E = ExtraInCaller.size(); I < E; ++I) {
+ if (ExtraInCaller.test(I)) {
+ if (!First)
+ dbgs() << ", ";
+ dbgs() << I;
+ First = false;
+ }
+ }
+ }
+ dbgs() << "]\n";
+
+ dbgs() << "Only-in-callee feature indices [";
+ {
+ bool First = true;
+ for (size_t I = 0, E = MissingInCaller.size(); I < E; ++I) {
+ if (MissingInCaller.test(I)) {
+ if (!First)
+ dbgs() << ", ";
+ dbgs() << I;
+ First = false;
+ }
+ }
+ }
+ dbgs() << "]\n";
+
+ // Indicies map to features as found in
+ // llvm-project/(your_build)/lib/Target/ARM/ARMGenSubtargetInfo.inc
+ dbgs() << "MatchExact=" << (MatchExact ? "true" : "false")
+ << " MatchSubset=" << (MatchSubset ? "true" : "false") << "\n";
+ });
return MatchExact && MatchSubset;
}
>From c053e0773294fd278ee83976aacbd60478b11a9a Mon Sep 17 00:00:00 2001
From: reucru01 <reuben.cruise at arm.com>
Date: Tue, 25 Nov 2025 10:09:56 +0000
Subject: [PATCH 3/3] Adds BF16 and SB to InlineFeaturesAllowed
These features being present in the caller and not the callee should not prevent a function from being inlined.
---
llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index e11bc7298aabf..7e271785661e9 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -72,6 +72,8 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
const FeatureBitset InlineFeaturesAllowed = {
ARM::FeatureDotProd,
ARM::HasV8Ops,
+ ARM::FeatureSB,
+ ARM::FeatureBF16,
ARM::FeatureVFP2,
ARM::FeatureVFP3,
ARM::FeatureNEON,
More information about the llvm-commits
mailing list