[llvm-branch-commits] [llvm] release/22.x: [ARM] Fix inlining issue in ARM (#169337) (PR #177974)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 26 07:08:05 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport fab06fae0064a2f1208331f9c355a26a4f9777f0
Requested by: @<!-- -->nikic
---
Full diff: https://github.com/llvm/llvm-project/pull/177974.diff
3 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+49)
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+132-34)
- (added) llvm/test/Transforms/Inline/ARM/inline-dotprod.ll (+35)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 88a7fb185bf16..b947c8a10e2d8 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -107,6 +107,55 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
// the callers'.
bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
(CalleeBits & InlineFeaturesAllowed);
+
+ LLVM_DEBUG({
+ if (!MatchExact || !MatchSubset) {
+ dbgs() << "=== Inline compatibility debug ===\n";
+ dbgs() << "Caller: " << Caller->getName() << "\n";
+ dbgs() << "Callee: " << Callee->getName() << "\n";
+
+ // Bit diffs
+ FeatureBitset MissingInCaller = CalleeBits & ~CallerBits; // callee-only
+ FeatureBitset ExtraInCaller = CallerBits & ~CalleeBits; // caller-only
+
+ // Counts
+ dbgs() << "Only-in-caller bit count: " << ExtraInCaller.count() << "\n";
+ dbgs() << "Only-in-callee bit count: " << MissingInCaller.count() << "\n";
+
+ dbgs() << "Only-in-caller feature indices [";
+ {
+ bool First = true;
+ for (size_t I = 0, E = ExtraInCaller.size(); I < E; ++I) {
+ if (ExtraInCaller.test(I)) {
+ if (!First)
+ dbgs() << ", ";
+ dbgs() << I;
+ First = false;
+ }
+ }
+ }
+ dbgs() << "]\n";
+
+ dbgs() << "Only-in-callee feature indices [";
+ {
+ bool First = true;
+ for (size_t I = 0, E = MissingInCaller.size(); I < E; ++I) {
+ if (MissingInCaller.test(I)) {
+ if (!First)
+ dbgs() << ", ";
+ dbgs() << I;
+ First = false;
+ }
+ }
+ }
+ dbgs() << "]\n";
+
+ // Indices map to features as found in
+ // llvm-project/(your_build)/lib/Target/ARM/ARMGenSubtargetInfo.inc
+ dbgs() << "MatchExact=" << (MatchExact ? "true" : "false")
+ << " MatchSubset=" << (MatchSubset ? "true" : "false") << "\n";
+ }
+ });
return MatchExact && MatchSubset;
}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index a23256364dd9a..fafd2d44a818c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -40,13 +40,13 @@ class Type;
class Value;
namespace TailPredication {
- enum Mode {
- Disabled = 0,
- EnabledNoReductions,
- Enabled,
- ForceEnabledNoReductions,
- ForceEnabled
- };
+enum Mode {
+ Disabled = 0,
+ EnabledNoReductions,
+ Enabled,
+ ForceEnabledNoReductions,
+ ForceEnabled
+};
}
// For controlling conversion of memcpy into Tail Predicated loop.
@@ -64,37 +64,135 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
const ARMTargetLowering *TLI;
// Currently the following features are excluded from InlineFeaturesAllowed.
- // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
+ // ModeThumb, FeatureNoARM, ModeSoftFloat.
// Depending on whether they are set or unset, different
// instructions/registers are available. For example, inlining a callee with
// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
// fail if the callee uses ARM only instructions, e.g. in inline asm.
- const FeatureBitset InlineFeaturesAllowed = {
- ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
- ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
- ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
- ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
- ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
- ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
- ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
- ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
- ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
- ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
- ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
- ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
- ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
- ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
- ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
- ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
- ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
- ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
- ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
- ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
- ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
- ARM::FeatureAClass, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
- ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
- ARM::FeatureNoNegativeImmediates
- };
+ const FeatureBitset InlineFeaturesAllowed = {ARM::Feature8MSecExt,
+ ARM::FeatureAClass,
+ ARM::FeatureAES,
+ ARM::FeatureAcquireRelease,
+ ARM::FeatureAvoidMOVsShOp,
+ ARM::FeatureAvoidMULS,
+ ARM::FeatureAvoidPartialCPSR,
+ ARM::FeatureBF16,
+ ARM::FeatureCRC,
+ ARM::FeatureCheapPredicableCPSR,
+ ARM::FeatureCheckVLDnAlign,
+ ARM::FeatureCrypto,
+ ARM::FeatureD32,
+ ARM::FeatureDB,
+ ARM::FeatureDFB,
+ ARM::FeatureDSP,
+ ARM::FeatureDontWidenVMOVS,
+ ARM::FeatureDotProd,
+ ARM::FeatureExecuteOnly,
+ ARM::FeatureExpandMLx,
+ ARM::FeatureFP16,
+ ARM::FeatureFP16FML,
+ ARM::FeatureFP64,
+ ARM::FeatureFPAO,
+ ARM::FeatureFPARMv8,
+ ARM::FeatureFPARMv8_D16,
+ ARM::FeatureFPARMv8_D16_SP,
+ ARM::FeatureFPARMv8_SP,
+ ARM::FeatureFPRegs,
+ ARM::FeatureFPRegs16,
+ ARM::FeatureFPRegs64,
+ ARM::FeatureFullFP16,
+ ARM::FeatureFuseAES,
+ ARM::FeatureFuseLiterals,
+ ARM::FeatureHWDivARM,
+ ARM::FeatureHWDivThumb,
+ ARM::FeatureHasNoBranchPredictor,
+ ARM::FeatureHasRetAddrStack,
+ ARM::FeatureHasSlowFPVFMx,
+ ARM::FeatureHasSlowFPVMLx,
+ ARM::FeatureHasVMLxHazards,
+ ARM::FeatureLOB,
+ ARM::FeatureLongCalls,
+ ARM::FeatureMClass,
+ ARM::FeatureMP,
+ ARM::FeatureMVEVectorCostFactor1,
+ ARM::FeatureMVEVectorCostFactor2,
+ ARM::FeatureMVEVectorCostFactor4,
+ ARM::FeatureMatMulInt8,
+ ARM::FeatureMuxedUnits,
+ ARM::FeatureNEON,
+ ARM::FeatureNEONForFP,
+ ARM::FeatureNEONForFPMovs,
+ ARM::FeatureNoMovt,
+ ARM::FeatureNoNegativeImmediates,
+ ARM::FeatureNoPostRASched,
+ ARM::FeaturePerfMon,
+ ARM::FeaturePref32BitThumb,
+ ARM::FeaturePrefISHSTBarrier,
+ ARM::FeaturePreferBranchAlign32,
+ ARM::FeaturePreferBranchAlign64,
+ ARM::FeaturePreferVMOVSR,
+ ARM::FeatureProfUnpredicate,
+ ARM::FeatureRAS,
+ ARM::FeatureRClass,
+ ARM::FeatureReserveR9,
+ ARM::FeatureSB,
+ ARM::FeatureSHA2,
+ ARM::FeatureSlowFPBrcc,
+ ARM::FeatureSlowLoadDSubreg,
+ ARM::FeatureSlowOddRegister,
+ ARM::FeatureSlowVDUP32,
+ ARM::FeatureSlowVGETLNi32,
+ ARM::FeatureSplatVFPToNeon,
+ ARM::FeatureStrictAlign,
+ ARM::FeatureThumb2,
+ ARM::FeatureTrustZone,
+ ARM::FeatureUseMIPipeliner,
+ ARM::FeatureUseMISched,
+ ARM::FeatureUseWideStrideVFP,
+ ARM::FeatureV7Clrex,
+ ARM::FeatureVFP2,
+ ARM::FeatureVFP2_SP,
+ ARM::FeatureVFP3,
+ ARM::FeatureVFP3_D16,
+ ARM::FeatureVFP3_D16_SP,
+ ARM::FeatureVFP3_SP,
+ ARM::FeatureVFP4,
+ ARM::FeatureVFP4_D16,
+ ARM::FeatureVFP4_D16_SP,
+ ARM::FeatureVFP4_SP,
+ ARM::FeatureVMLxForwarding,
+ ARM::FeatureVirtualization,
+ ARM::FeatureZCZeroing,
+ ARM::HasMVEFloatOps,
+ ARM::HasMVEIntegerOps,
+ ARM::HasV5TEOps,
+ ARM::HasV5TOps,
+ ARM::HasV6KOps,
+ ARM::HasV6MOps,
+ ARM::HasV6Ops,
+ ARM::HasV6T2Ops,
+ ARM::HasV7Ops,
+ ARM::HasV8MBaselineOps,
+ ARM::HasV8MMainlineOps,
+ ARM::HasV8Ops,
+ ARM::HasV8_1MMainlineOps,
+ ARM::HasV8_1aOps,
+ ARM::HasV8_2aOps,
+ ARM::HasV8_3aOps,
+ ARM::HasV8_4aOps,
+ ARM::HasV8_5aOps,
+ ARM::HasV8_6aOps,
+ ARM::HasV8_7aOps,
+ ARM::HasV8_8aOps,
+ ARM::HasV8_9aOps,
+ ARM::HasV9_0aOps,
+ ARM::HasV9_1aOps,
+ ARM::HasV9_2aOps,
+ ARM::HasV9_3aOps,
+ ARM::HasV9_4aOps,
+ ARM::HasV9_5aOps,
+ ARM::HasV9_6aOps,
+ ARM::HasV9_7aOps};
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
diff --git a/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll
new file mode 100644
index 0000000000000..2f8dbb7f01822
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes=inline | FileCheck %s
+; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s
+
+declare i32 @foo(...) #0
+
+define i32 @callee() #0 {
+entry:
+ %call = call i32 (...) @foo()
+ ret i32 %call
+}
+
+define i32 @dotcallee() #1 {
+entry:
+ %call = call i32 (...) @foo()
+ ret i32 %call
+}
+
+define i32 @dotcaller() #1 {
+entry:
+ %call = call i32 @callee()
+ ret i32 %call
+; CHECK-LABEL: dotcaller
+; CHECK: call i32 (...) @foo()
+}
+
+define i32 @caller() #0 {
+entry:
+ %call = call i32 @dotcallee()
+ ret i32 %call
+; CHECK-LABEL: caller
+; CHECK: call i32 @dotcallee()
+}
+
+attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" }
+attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+dotprod" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/177974
More information about the llvm-branch-commits
mailing list