[llvm] r354355 - [X86] Filter out tuning feature flags and a few ISA feature flags when checking for function inline compatibility.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 19 09:05:11 PST 2019


Author: ctopper
Date: Tue Feb 19 09:05:11 2019
New Revision: 354355

URL: http://llvm.org/viewvc/llvm-project?rev=354355&view=rev
Log:
[X86] Filter out tuning feature flags and a few ISA feature flags when checking for function inline compatibility.

Tuning flags don't have any effect on the available instructions so aren't a good reason to prevent inlining.

There are also some ISA flags that don't have any intrinsics our ABI requirements that we can exclude. I've put only the most basic ones like cmpxchg16b and lahfsahf. These are interesting because they aren't present in all 64-bit CPUs, but we have codegen workarounds when they aren't present.

Loosening these checks can help with scenarios where a caller has a more specific CPU than a callee. The default tuning flags on our generic 'x86-64' CPU can currently make it inline compatible with other CPUs. I've also added an example test for 'nocona' and 'prescott' where 'nocona' is just a 64-bit capable version of 'prescott' but in 32-bit mode they should be completely compatible.

I've based the implementation here of the similar code in AMDGPU.

Differential Revision: https://reviews.llvm.org/D58371

Added:
    llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll
    llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll
Modified:
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=354355&r1=354354&r2=354355&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Tue Feb 19 09:05:11 2019
@@ -3065,10 +3065,9 @@ bool X86TTIImpl::areInlineCompatible(con
   const FeatureBitset &CalleeBits =
       TM.getSubtargetImpl(*Callee)->getFeatureBits();
 
-  // FIXME: This is likely too limiting as it will include subtarget features
-  // that we might not care about for inlining, but it is conservatively
-  // correct.
-  return (CallerBits & CalleeBits) == CalleeBits;
+  FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+  FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+  return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
 }
 
 const X86TTIImpl::TTI::MemCmpExpansionOptions *

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h?rev=354355&r1=354354&r2=354355&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h Tue Feb 19 09:05:11 2019
@@ -35,6 +35,60 @@ class X86TTIImpl : public BasicTTIImplBa
   const X86Subtarget *getST() const { return ST; }
   const X86TargetLowering *getTLI() const { return TLI; }
 
+  const FeatureBitset InlineFeatureIgnoreList = {
+    // This indicates the CPU is 64 bit capable not that we are in 64-bit mode.
+    X86::Feature64Bit,
+
+    // These features don't have any intrinsics or ABI effect.
+    X86::FeatureNOPL,
+    X86::FeatureCMPXCHG16B,
+    X86::FeatureLAHFSAHF,
+
+    // Codegen control options.
+    X86::FeatureFast11ByteNOP,
+    X86::FeatureFast15ByteNOP,
+    X86::FeatureFastBEXTR,
+    X86::FeatureFastHorizontalOps,
+    X86::FeatureFastLZCNT,
+    X86::FeatureFastPartialYMMorZMMWrite,
+    X86::FeatureFastScalarFSQRT,
+    X86::FeatureFastSHLDRotate,
+    X86::FeatureFastVariableShuffle,
+    X86::FeatureFastVectorFSQRT,
+    X86::FeatureLEAForSP,
+    X86::FeatureLEAUsesAG,
+    X86::FeatureLZCNTFalseDeps,
+    X86::FeatureMacroFusion,
+    X86::FeatureMergeToThreeWayBranch,
+    X86::FeaturePadShortFunctions,
+    X86::FeaturePOPCNTFalseDeps,
+    X86::FeatureSSEUnalignedMem,
+    X86::FeatureSlow3OpsLEA,
+    X86::FeatureSlowDivide32,
+    X86::FeatureSlowDivide64,
+    X86::FeatureSlowIncDec,
+    X86::FeatureSlowLEA,
+    X86::FeatureSlowPMADDWD,
+    X86::FeatureSlowPMULLD,
+    X86::FeatureSlowSHLD,
+    X86::FeatureSlowTwoMemOps,
+    X86::FeatureSlowUAMem16,
+
+    // Perf-tuning flags.
+    X86::FeatureHasFastGather,
+    X86::FeatureSlowUAMem32,
+
+    // Based on whether user set the -mprefer-vector-width command line.
+    X86::FeaturePrefer256Bit,
+
+    // CPU name enums. These just follow CPU string.
+    X86::ProcIntelAtom,
+    X86::ProcIntelGLM,
+    X86::ProcIntelGLP,
+    X86::ProcIntelSLM,
+    X86::ProcIntelTRM,
+  };
+
 public:
   explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),

Added: llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll?rev=354355&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll (added)
+++ llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-i686.ll Tue Feb 19 09:05:11 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -mtriple=i686-unknown-unknown -S -inline | FileCheck %s
+
+define i32 @func_target_cpu_nocona() #0 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_prescott_call_target_cpu_nocona() #1 {
+  %call = call i32 @func_target_cpu_nocona()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind "target-cpu"="nocona" }
+attributes #1 = { nounwind "target-cpu"="prescott" }

Added: llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll?rev=354355&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll (added)
+++ llvm/trunk/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll Tue Feb 19 09:05:11 2019
@@ -0,0 +1,43 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -inline | FileCheck %s
+
+define i32 @func_target_cpu_base() #0 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_k8_call_target_cpu_base() #1 {
+  %call = call i32 @func_target_cpu_base()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 {
+  %call = call i32 @func_target_cpu_base()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 {
+  %call = call i32 @func_target_cpu_base()
+  ret i32 %call
+}
+
+define i32 @func_target_cpu_nocona() #4 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 {
+  %call = call i32 @func_target_cpu_nocona()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind "target-cpu"="x86-64" }
+attributes #1 = { nounwind "target-cpu"="k8" }
+attributes #2 = { nounwind "target-cpu"="nehalem" }
+attributes #3 = { nounwind "target-cpu"="goldmont" }
+attributes #4 = { nounwind "target-cpu"="nocona" "target-features"="-sse3" }




More information about the llvm-commits mailing list