[llvm] r310269 - AMDGPU: Use a custom areInlineCompatible
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 7 10:08:44 PDT 2017
Author: arsenm
Date: Mon Aug 7 10:08:44 2017
New Revision: 310269
URL: http://llvm.org/viewvc/llvm-project?rev=310269&view=rev
Log:
AMDGPU: Use a custom areInlineCompatible
Fixes not inlining OpenCL library functions on AMDGPU,
which don't have an explicitly set target-cpu.
Added:
llvm/trunk/test/Transforms/Inline/AMDGPU/
llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=310269&r1=310268&r2=310269&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Mon Aug 7 10:08:44 2017
@@ -534,3 +534,16 @@ unsigned AMDGPUTTIImpl::getShuffleCost(T
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
+
+bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+ FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+ return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=310269&r1=310268&r2=310269&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Mon Aug 7 10:08:44 2017
@@ -34,6 +34,32 @@ class AMDGPUTTIImpl final : public Basic
const AMDGPUTargetLowering *TLI;
bool IsGraphicsShader;
+
+ const FeatureBitset InlineFeatureIgnoreList = {
+ // Codegen control options which don't matter.
+ AMDGPU::FeatureEnableLoadStoreOpt,
+ AMDGPU::FeatureEnableSIScheduler,
+ AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
+ AMDGPU::FeatureFlatForGlobal,
+ AMDGPU::FeaturePromoteAlloca,
+ AMDGPU::FeatureUnalignedBufferAccess,
+ AMDGPU::FeatureUnalignedScratchAccess,
+
+ AMDGPU::FeatureAutoWaitcntBeforeBarrier,
+ AMDGPU::FeatureDebuggerEmitPrologue,
+ AMDGPU::FeatureDebuggerInsertNops,
+ AMDGPU::FeatureDebuggerReserveRegs,
+
+ // Property of the kernel/environment which can't actually differ.
+ AMDGPU::FeatureSGPRInitBug,
+ AMDGPU::FeatureXNACK,
+ AMDGPU::FeatureTrapHandler,
+
+ // Perf-tuning features
+ AMDGPU::FeatureFastFMAF32,
+ AMDGPU::HalfRate64Ops
+ };
+
const AMDGPUSubtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
@@ -121,6 +147,9 @@ public:
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp);
+
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
};
} // end namespace llvm
Added: llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll?rev=310269&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll (added)
+++ llvm/trunk/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll Mon Aug 7 10:08:44 2017
@@ -0,0 +1,90 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
+
+; CHECK-LABEL: @func_no_target_cpu(
+define i32 @func_no_target_cpu() #0 {
+ ret i32 0
+}
+
+; CHECK-LABEL: @target_cpu_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_call_no_target_cpu() #1 {
+ %call = call i32 @func_no_target_cpu()
+ ret i32 %call
+}
+
+; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @target_cpu_target_features_call_no_target_cpu() #2 {
+ %call = call i32 @func_no_target_cpu()
+ ret i32 %call
+}
+
+; CHECK-LABEL: @fp32_denormals(
+define i32 @fp32_denormals() #3 {
+ ret i32 0
+}
+
+; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals(
+; CHECK-NEXT: call i32 @fp32_denormals()
+define i32 @no_fp32_denormals_call_f32_denormals() #4 {
+ %call = call i32 @fp32_denormals()
+ ret i32 %call
+}
+
+; Make sure gfx9 can call unspecified functions because of movrel
+; feature change.
+; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu(
+; CHECK-NEXT: ret i32 0
+define i32 @gfx9_target_features_call_no_target_cpu() #5 {
+ %call = call i32 @func_no_target_cpu()
+ ret i32 %call
+}
+
+define i32 @func_no_halfrate64ops() #6 {
+ ret i32 0
+}
+
+define i32 @func_with_halfrate64ops() #7 {
+ ret i32 0
+}
+
+; CHECK-LABEL: @call_func_without_halfrate64ops(
+; CHECK-NEXT: ret i32 0
+define i32 @call_func_without_halfrate64ops() #7 {
+ %call = call i32 @func_no_halfrate64ops()
+ ret i32 %call
+}
+
+; CHECK-LABEL: @call_func_with_halfrate64ops(
+; CHECK-NEXT: ret i32 0
+define i32 @call_func_with_halfrate64ops() #6 {
+ %call = call i32 @func_with_halfrate64ops()
+ ret i32 %call
+}
+
+define i32 @func_no_loadstoreopt() #8 {
+ ret i32 0
+}
+
+define i32 @func_with_loadstoreopt() #9 {
+ ret i32 0
+}
+
+; CHECK-LABEL: @call_func_without_loadstoreopt(
+; CHECK-NEXT: ret i32 0
+define i32 @call_func_without_loadstoreopt() #9 {
+ %call = call i32 @func_no_loadstoreopt()
+ ret i32 %call
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals" }
+attributes #4 = { nounwind "target-features"="-fp32-denormals" }
+attributes #5 = { nounwind "target-cpu"="gfx900" }
+attributes #6 = { nounwind "target-features"="-half-rate-64-ops" }
+attributes #7 = { nounwind "target-features"="+half-rate-64-ops" }
+attributes #8 = { nounwind "target-features"="-load-store-opt" }
+attributes #9 = { nounwind "target-features"="+load-store-opt" }
Added: llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg?rev=310269&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/Inline/AMDGPU/lit.local.cfg Mon Aug 7 10:08:44 2017
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
More information about the llvm-commits
mailing list