[llvm] [PowerPC] Inline callee if its target-features are a subset of the caller (PR #67710)

Shimin Cui via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 28 10:06:55 PDT 2023


https://github.com/scui-ibm created https://github.com/llvm/llvm-project/pull/67710

Simliar to other plateforms (X86, ARM), it should be safe to inline callees if their target-features
are a subset of the caller.  


>From b0317c5603728f60ca529eea0b8fcda656bf40f8 Mon Sep 17 00:00:00 2001
From: Shimin Cui <scui at ca.ibm.com>
Date: Thu, 28 Sep 2023 12:59:05 -0400
Subject: [PATCH] [PowerPC] Inline callee if its target-features are a subset
 of the caller

---
 .../Target/PowerPC/PPCTargetTransformInfo.cpp | 13 +++++++
 .../Target/PowerPC/PPCTargetTransformInfo.h   |  2 +
 .../Inline/PowerPC/inline-target-features.ll  | 37 +++++++++++++++++++
 3 files changed, 52 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/PowerPC/inline-target-features.ll

diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index ca0f2c2e18af5f9..e378e5f671cb71e 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -885,6 +885,19 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 }
 
+bool PPCTTIImpl::areInlineCompatible(const Function *Caller,
+                                     const Function *Callee) const {
+  // Allow inlining only when the Callee has a subset of the Caller's features.
+  const TargetMachine &TM = getTLI()->getTargetMachine();
+
+  const FeatureBitset &CallerBits =
+      TM.getSubtargetImpl(*Caller)->getFeatureBits();
+  const FeatureBitset &CalleeBits =
+      TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+  return (CallerBits & CalleeBits) == CalleeBits;
+}
+
 bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,
                                        const Function *Callee,
                                        const ArrayRef<Type *> &Types) const {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index c3ade9968c336a0..bdc2f17d95c03c2 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -138,6 +138,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
       bool UseMaskForCond = false, bool UseMaskForGaps = false);
   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                         TTI::TargetCostKind CostKind);
+  bool areInlineCompatible(const Function *Caller,
+                           const Function *Callee) const;
   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
                              const ArrayRef<Type *> &Types) const;
   bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
diff --git a/llvm/test/Transforms/Inline/PowerPC/inline-target-features.ll b/llvm/test/Transforms/Inline/PowerPC/inline-target-features.ll
new file mode 100644
index 000000000000000..2b703cfaf87d4ac
--- /dev/null
+++ b/llvm/test/Transforms/Inline/PowerPC/inline-target-features.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -mtriple=powerpc64le-unknown-linux-gnu -S -passes=inline | FileCheck %s
+; Check that we only inline when we have compatible target features.
+
+target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define i32 @f1() #0 {
+; CHECK-LABEL: define i32 @f1(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (...) @f0()
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 (...) @f0()
+  ret i32 %call
+}
+
+define i32 @f2() #1 {
+; CHECK-LABEL: define i32 @f2(
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i32 (...) @f0()
+; CHECK-NEXT:    ret i32 [[CALL_I]]
+;
+  %call = call i32 @f1()
+  ret i32 %call
+}
+
+define i32 @f3() #0 {
+; CHECK-LABEL: define i32 @f3(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @f2()
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @f2()
+  ret i32 %call
+}
+
+declare i32 @f0(...) #0
+
+attributes #0 = { "target-cpu"="pwr7" "target-features"="-crbits,-crypto,-direct-move,-isa-v207-instructions,-power8-vector" }
+attributes #1 = { "target-cpu"="pwr8" "target-features"="+crbits,+crypto,+direct-move,+isa-v207-instructions,+power8-vector" }



More information about the llvm-commits mailing list