[llvm] r310269 - AMDGPU: Use a custom areInlineCompatible

Mon Aug 7 10:08:44 PDT 2017

Author: arsenm
Date: Mon Aug  7 10:08:44 2017
New Revision: 310269

URL: http://llvm.org/viewvc/llvm-project?rev=310269&view=rev
Log:
AMDGPU: Use a custom areInlineCompatible

Fixes not inlining OpenCL library functions on AMDGPU,
which don't have an explicitly set target-cpu.

Added:
    llvm/trunk/test/Transforms/Inline/AMDGPU/
    llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
    llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=310269&r1=310268&r2=310269&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Mon Aug  7 10:08:44 2017
@@ -534,3 +534,16 @@ unsigned AMDGPUTTIImpl::getShuffleCost(T
 
   return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }
+
+bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller,
+                                        const Function *Callee) const {
+  const TargetMachine &TM = getTLI()->getTargetMachine();
+  const FeatureBitset &CallerBits =
+    TM.getSubtargetImpl(*Caller)->getFeatureBits();
+  const FeatureBitset &CalleeBits =
+    TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+  FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+  FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+  return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
+}

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=310269&r1=310268&r2=310269&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Mon Aug  7 10:08:44 2017
@@ -34,6 +34,32 @@ class AMDGPUTTIImpl final : public Basic
   const AMDGPUTargetLowering *TLI;
   bool IsGraphicsShader;
 
+
+  const FeatureBitset InlineFeatureIgnoreList = {
+    // Codegen control options which don't matter.
+    AMDGPU::FeatureEnableLoadStoreOpt,
+    AMDGPU::FeatureEnableSIScheduler,
+    AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
+    AMDGPU::FeatureFlatForGlobal,
+    AMDGPU::FeaturePromoteAlloca,
+    AMDGPU::FeatureUnalignedBufferAccess,
+    AMDGPU::FeatureUnalignedScratchAccess,
+
+    AMDGPU::FeatureAutoWaitcntBeforeBarrier,
+    AMDGPU::FeatureDebuggerEmitPrologue,
+    AMDGPU::FeatureDebuggerInsertNops,
+    AMDGPU::FeatureDebuggerReserveRegs,
+
+    // Property of the kernel/environment which can't actually differ.
+    AMDGPU::FeatureSGPRInitBug,
+    AMDGPU::FeatureXNACK,
+    AMDGPU::FeatureTrapHandler,
+
+    // Perf-tuning features
+    AMDGPU::FeatureFastFMAF32,
+    AMDGPU::HalfRate64Ops
+  };
+
   const AMDGPUSubtarget *getST() const { return ST; }
   const AMDGPUTargetLowering *getTLI() const { return TLI; }
 
@@ -121,6 +147,9 @@ public:
 
   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                           Type *SubTp);
+
+  bool areInlineCompatible(const Function *Caller,
+                           const Function *Callee) const;
 };
 
 } // end namespace llvm

Added: llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll?rev=310269&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll (added)
+++ llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll Mon Aug  7 10:08:44 2017
@@ -0,0 +1,90 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
+
+; CHECK-LABEL: @func_no_target_cpu(
+define i32 @func_no_target_cpu() #0 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_call_no_target_cpu() #1 {
+  %call = call i32 @func_no_target_cpu()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_features_call_no_target_cpu() #2 {
+  %call = call i32 @func_no_target_cpu()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @fp32_denormals(
+define i32 @fp32_denormals() #3 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals(
+; CHECK-NEXT: call i32 @fp32_denormals()
+define i32 @no_fp32_denormals_call_f32_denormals() #4 {
+  %call = call i32 @fp32_denormals()
+  ret i32 %call
+}
+
+; Make sure gfx9 can call unspecified functions because of movrel
+; feature change.
+; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @gfx9_target_features_call_no_target_cpu() #5 {
+  %call = call i32 @func_no_target_cpu()
+  ret i32 %call
+}
+
+define i32 @func_no_halfrate64ops() #6 {
+  ret i32 0
+}
+
+define i32 @func_with_halfrate64ops() #7 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @call_func_without_halfrate64ops(
+; CHECK-NEXT: ret i32 0
+define i32 @call_func_without_halfrate64ops() #7 {
+  %call = call i32 @func_no_halfrate64ops()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @call_func_with_halfrate64ops(
+; CHECK-NEXT: ret i32 0
+define i32 @call_func_with_halfrate64ops() #6 {
+  %call = call i32 @func_with_halfrate64ops()
+  ret i32 %call
+}
+
+define i32 @func_no_loadstoreopt() #8 {
+  ret i32 0
+}
+
+define i32 @func_with_loadstoreopt() #9 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @call_func_without_loadstoreopt(
+; CHECK-NEXT: ret i32 0
+define i32 @call_func_without_loadstoreopt() #9 {
+  %call = call i32 @func_no_loadstoreopt()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals" }
+attributes #4 = { nounwind "target-features"="-fp32-denormals" }
+attributes #5 = { nounwind "target-cpu"="gfx900" }
+attributes #6 = { nounwind "target-features"="-half-rate-64-ops" }
+attributes #7 = { nounwind "target-features"="+half-rate-64-ops" }
+attributes #8 = { nounwind "target-features"="-load-store-opt" }
+attributes #9 = { nounwind "target-features"="+load-store-opt" }

Added: llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg?rev=310269&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg Mon Aug  7 10:08:44 2017
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True