[llvm] r273940 - AMDGPU: Implement per-function subtargets
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 27 13:48:03 PDT 2016
Author: arsenm
Date: Mon Jun 27 15:48:03 2016
New Revision: 273940
URL: http://llvm.org/viewvc/llvm-project?rev=273940&view=rev
Log:
AMDGPU: Implement per-function subtargets
Added:
llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Jun 27 15:48:03 2016
@@ -202,17 +202,7 @@ SISubtarget::SISubtarget(const Triple &T
AMDGPUSubtarget(TT, GPU, FS, TM),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- TLInfo(TM, *this) {
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- AMDGPUGISelActualAccessor *GISel =
- new AMDGPUGISelActualAccessor();
- GISel->CallLoweringInfo.reset(
- new AMDGPUCallLowering(*getTargetLowering()));
-#endif
- setGISelAccessor(*GISel);
-}
+ TLInfo(TM, *this) {}
unsigned R600Subtarget::getStackEntrySize() const {
switch (getWavefrontSize()) {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Jun 27 15:48:03 2016
@@ -145,6 +145,20 @@ AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
+StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
+ Attribute GPUAttr = F.getFnAttribute("target-cpu");
+ return GPUAttr.hasAttribute(Attribute::None) ?
+ getTargetCPU() : GPUAttr.getValueAsString();
+}
+
+StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ return FSAttr.hasAttribute(Attribute::None) ?
+ getTargetFeatureString() :
+ FSAttr.getValueAsString();
+}
+
//===----------------------------------------------------------------------===//
// R600 Target Machine (R600 -> Cayman)
//===----------------------------------------------------------------------===//
@@ -154,8 +168,27 @@ R600TargetMachine::R600TargetMachine(con
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, getTargetCPU(), FS, *this) {}
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+
+const R600Subtarget *R600TargetMachine::getSubtargetImpl(
+ const Function &F) const {
+ StringRef GPU = getGPUName(F);
+ StringRef FS = getFeatureString(F);
+
+ SmallString<128> SubtargetKey(GPU);
+ SubtargetKey.append(FS);
+
+ auto &I = SubtargetMap[SubtargetKey];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
+ }
+
+ return I.get();
+}
//===----------------------------------------------------------------------===//
// GCN Target Machine (SI+)
@@ -166,8 +199,34 @@ GCNTargetMachine::GCNTargetMachine(const
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, getTargetCPU(), FS, *this) {}
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+
+const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
+ StringRef GPU = getGPUName(F);
+ StringRef FS = getFeatureString(F);
+
+ SmallString<128> SubtargetKey(GPU);
+ SubtargetKey.append(FS);
+
+ auto &I = SubtargetMap[SubtargetKey];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
+#endif
+
+ I->setGISelAccessor(*GISel);
+ }
+
+ return I.get();
+}
//===----------------------------------------------------------------------===//
// AMDGPU Pass Setup
@@ -244,8 +303,7 @@ public:
TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(
- AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
+ return TargetTransformInfo(AMDGPUTTIImpl(this, F));
});
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h Mon Jun 27 15:48:03 2016
@@ -29,6 +29,9 @@ protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUIntrinsicInfo IntrinsicInfo;
+ StringRef getGPUName(const Function &F) const;
+ StringRef getFeatureString(const Function &F) const;
+
public:
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, TargetOptions Options,
@@ -55,7 +58,7 @@ public:
class R600TargetMachine final : public AMDGPUTargetMachine {
private:
- R600Subtarget Subtarget;
+ mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
public:
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -65,13 +68,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- const R600Subtarget *getSubtargetImpl() const {
- return &Subtarget;
- }
-
- const R600Subtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const R600Subtarget *getSubtargetImpl(const Function &) const override;
};
//===----------------------------------------------------------------------===//
@@ -80,7 +77,7 @@ public:
class GCNTargetMachine final : public AMDGPUTargetMachine {
private:
- SISubtarget Subtarget;
+ mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
public:
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -90,21 +87,9 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- const SISubtarget *getSubtargetImpl() const {
- return &Subtarget;
- }
-
- const SISubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const SISubtarget *getSubtargetImpl(const Function &) const override;
};
-inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl() const {
- if (getTargetTriple().getArch() == Triple::amdgcn)
- return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl();
- return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl();
-}
-
inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl(
const Function &F) const {
if (getTargetTriple().getArch() == Triple::amdgcn)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Mon Jun 27 15:48:03 2016
@@ -59,9 +59,10 @@ class AMDGPUTTIImpl final : public Basic
}
public:
- explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL)
- : BaseT(TM, DL), ST(TM->getSubtargetImpl()),
- TLI(ST->getTargetLowering()) {}
+ explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
Added: llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll?rev=273940&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll Mon Jun 27 15:48:03 2016
@@ -0,0 +1,112 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+; CI+ intrinsic
+declare void @llvm.amdgcn.s.dcache.inv.vol() #0
+
+; VI+ intrinsic
+declare void @llvm.amdgcn.s.dcache.wb() #0
+
+; CHECK-LABEL: {{^}}target_none:
+; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @target_none() #0 {
+ %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %id = call i32 @llvm.amdgcn.workitem.id.x()
+ %id.ext = sext i32 %id to i64
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+; CHECK-LABEL: {{^}}target_tahiti:
+; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @target_tahiti() #1 {
+ %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %id = call i32 @llvm.amdgcn.workitem.id.x()
+ %id.ext = sext i32 %id to i64
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+; CHECK-LABEL: {{^}}target_bonaire:
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+; CHECK: s_dcache_inv_vol
+define void @target_bonaire() #3 {
+ %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %id = call i32 @llvm.amdgcn.workitem.id.x()
+ %id.ext = sext i32 %id to i64
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+ store i32 0, i32 addrspace(1)* %gep
+ call void @llvm.amdgcn.s.dcache.inv.vol()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}target_fiji:
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x400
+; CHECK: flat_store_dword
+; CHECK: s_dcache_wb{{$}}
+define void @target_fiji() #4 {
+ %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+ %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+ %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+ %id = call i32 @llvm.amdgcn.workitem.id.x()
+ %id.ext = sext i32 %id to i64
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+ store i32 0, i32 addrspace(1)* %gep
+ call void @llvm.amdgcn.s.dcache.wb()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}promote_alloca_enabled:
+; CHECK: ds_read_b32
+; CHECK: ; LDSByteSize: 5120
+define void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 {
+entry:
+ %stack = alloca [5 x i32], align 4
+ %tmp = load i32, i32 addrspace(1)* %in, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
+ %load = load i32, i32* %arrayidx1
+ store i32 %load, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}promote_alloca_disabled:
+; CHECK: SCRATCH_RSRC_DWORD0
+; CHECK: SCRATCH_RSRC_DWORD1
+; CHECK: ScratchSize: 24
+define void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 {
+entry:
+ %stack = alloca [5 x i32], align 4
+ %tmp = load i32, i32 addrspace(1)* %in, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
+ %load = load i32, i32* %arrayidx1
+ store i32 %load, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "target-cpu"="tahiti" }
+attributes #3 = { nounwind "target-cpu"="bonaire" }
+attributes #4 = { nounwind "target-cpu"="fiji" }
+attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-max-waves-per-eu"="3" }
+attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-max-waves-per-eu"="3" }
More information about the llvm-commits
mailing list