[llvm] r273940 - AMDGPU: Implement per-function subtargets

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 27 13:48:03 PDT 2016


Author: arsenm
Date: Mon Jun 27 15:48:03 2016
New Revision: 273940

URL: http://llvm.org/viewvc/llvm-project?rev=273940&view=rev
Log:
AMDGPU: Implement per-function subtargets

Added:
    llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Jun 27 15:48:03 2016
@@ -202,17 +202,7 @@ SISubtarget::SISubtarget(const Triple &T
   AMDGPUSubtarget(TT, GPU, FS, TM),
   InstrInfo(*this),
   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
-  TLInfo(TM, *this) {
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-  GISelAccessor *GISel = new GISelAccessor();
-#else
-  AMDGPUGISelActualAccessor *GISel =
-    new AMDGPUGISelActualAccessor();
-  GISel->CallLoweringInfo.reset(
-    new AMDGPUCallLowering(*getTargetLowering()));
-#endif
-  setGISelAccessor(*GISel);
-}
+  TLInfo(TM, *this) {}
 
 unsigned R600Subtarget::getStackEntrySize() const {
   switch (getWavefrontSize()) {

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Jun 27 15:48:03 2016
@@ -145,6 +145,20 @@ AMDGPUTargetMachine::AMDGPUTargetMachine
 
 AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
 
+StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
+  Attribute GPUAttr = F.getFnAttribute("target-cpu");
+  return GPUAttr.hasAttribute(Attribute::None) ?
+    getTargetCPU() : GPUAttr.getValueAsString();
+}
+
+StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
+  Attribute FSAttr = F.getFnAttribute("target-features");
+
+  return FSAttr.hasAttribute(Attribute::None) ?
+    getTargetFeatureString() :
+    FSAttr.getValueAsString();
+}
+
 //===----------------------------------------------------------------------===//
 // R600 Target Machine (R600 -> Cayman)
 //===----------------------------------------------------------------------===//
@@ -154,8 +168,27 @@ R600TargetMachine::R600TargetMachine(con
                                      TargetOptions Options,
                                      Optional<Reloc::Model> RM,
                                      CodeModel::Model CM, CodeGenOpt::Level OL)
-  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, getTargetCPU(), FS, *this) {}
+  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+
+const R600Subtarget *R600TargetMachine::getSubtargetImpl(
+  const Function &F) const {
+  StringRef GPU = getGPUName(F);
+  StringRef FS = getFeatureString(F);
+
+  SmallString<128> SubtargetKey(GPU);
+  SubtargetKey.append(FS);
+
+  auto &I = SubtargetMap[SubtargetKey];
+  if (!I) {
+    // This needs to be done before we create a new subtarget since any
+    // creation will depend on the TM and the code generation flags on the
+    // function that reside in TargetOptions.
+    resetTargetOptions(F);
+    I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
+  }
+
+  return I.get();
+}
 
 //===----------------------------------------------------------------------===//
 // GCN Target Machine (SI+)
@@ -166,8 +199,34 @@ GCNTargetMachine::GCNTargetMachine(const
                                    TargetOptions Options,
                                    Optional<Reloc::Model> RM,
                                    CodeModel::Model CM, CodeGenOpt::Level OL)
-  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, getTargetCPU(), FS, *this) {}
+  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+
+const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
+  StringRef GPU = getGPUName(F);
+  StringRef FS = getFeatureString(F);
+
+  SmallString<128> SubtargetKey(GPU);
+  SubtargetKey.append(FS);
+
+  auto &I = SubtargetMap[SubtargetKey];
+  if (!I) {
+    // This needs to be done before we create a new subtarget since any
+    // creation will depend on the TM and the code generation flags on the
+    // function that reside in TargetOptions.
+    resetTargetOptions(F);
+    I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+    GISelAccessor *GISel = new GISelAccessor();
+#else
+    SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
+#endif
+
+    I->setGISelAccessor(*GISel);
+  }
+
+  return I.get();
+}
 
 //===----------------------------------------------------------------------===//
 // AMDGPU Pass Setup
@@ -244,8 +303,7 @@ public:
 
 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
   return TargetIRAnalysis([this](const Function &F) {
-    return TargetTransformInfo(
-        AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
+    return TargetTransformInfo(AMDGPUTTIImpl(this, F));
   });
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h Mon Jun 27 15:48:03 2016
@@ -29,6 +29,9 @@ protected:
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
   AMDGPUIntrinsicInfo IntrinsicInfo;
 
+  StringRef getGPUName(const Function &F) const;
+  StringRef getFeatureString(const Function &F) const;
+
 public:
   AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                       StringRef FS, TargetOptions Options,
@@ -55,7 +58,7 @@ public:
 
 class R600TargetMachine final : public AMDGPUTargetMachine {
 private:
-  R600Subtarget Subtarget;
+  mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
 
 public:
   R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -65,13 +68,7 @@ public:
 
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
-  const R600Subtarget *getSubtargetImpl() const {
-    return &Subtarget;
-  }
-
-  const R600Subtarget *getSubtargetImpl(const Function &) const override {
-    return &Subtarget;
-  }
+  const R600Subtarget *getSubtargetImpl(const Function &) const override;
 };
 
 //===----------------------------------------------------------------------===//
@@ -80,7 +77,7 @@ public:
 
 class GCNTargetMachine final : public AMDGPUTargetMachine {
 private:
-    SISubtarget Subtarget;
+  mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
 
 public:
   GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -90,21 +87,9 @@ public:
 
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
-  const SISubtarget *getSubtargetImpl() const {
-    return &Subtarget;
-  }
-
-  const SISubtarget *getSubtargetImpl(const Function &) const override {
-    return &Subtarget;
-  }
+  const SISubtarget *getSubtargetImpl(const Function &) const override;
 };
 
-inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl() const {
-  if (getTargetTriple().getArch() == Triple::amdgcn)
-    return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl();
-  return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl();
-}
-
 inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl(
   const Function &F) const {
   if (getTargetTriple().getArch() == Triple::amdgcn)

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=273940&r1=273939&r2=273940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Mon Jun 27 15:48:03 2016
@@ -59,9 +59,10 @@ class AMDGPUTTIImpl final : public Basic
   }
 
 public:
-  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL)
-      : BaseT(TM, DL), ST(TM->getSubtargetImpl()),
-        TLI(ST->getTargetLowering()) {}
+  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+    : BaseT(TM, F.getParent()->getDataLayout()),
+      ST(TM->getSubtargetImpl(F)),
+      TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)

Added: llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll?rev=273940&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/target-cpu.ll Mon Jun 27 15:48:03 2016
@@ -0,0 +1,112 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+; CI+ intrinsic
+declare void @llvm.amdgcn.s.dcache.inv.vol() #0
+
+; VI+ intrinsic
+declare void @llvm.amdgcn.s.dcache.wb() #0
+
+; CHECK-LABEL: {{^}}target_none:
+; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @target_none() #0 {
+  %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+  %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+  %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+  %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  %id.ext = sext i32 %id to i64
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+  store i32 0, i32 addrspace(1)* %gep
+  ret void
+}
+
+; CHECK-LABEL: {{^}}target_tahiti:
+; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @target_tahiti() #1 {
+  %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+  %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+  %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+  %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  %id.ext = sext i32 %id to i64
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+  store i32 0, i32 addrspace(1)* %gep
+  ret void
+}
+
+; CHECK-LABEL: {{^}}target_bonaire:
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+; CHECK: s_dcache_inv_vol
+define void @target_bonaire() #3 {
+  %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+  %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+  %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+  %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  %id.ext = sext i32 %id to i64
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+  store i32 0, i32 addrspace(1)* %gep
+  call void @llvm.amdgcn.s.dcache.inv.vol()
+  ret void
+}
+
+; CHECK-LABEL: {{^}}target_fiji:
+; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x400
+; CHECK: flat_store_dword
+; CHECK: s_dcache_wb{{$}}
+define void @target_fiji() #4 {
+  %kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+  %kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
+  %kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
+  %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  %id.ext = sext i32 %id to i64
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
+  store i32 0, i32 addrspace(1)* %gep
+  call void @llvm.amdgcn.s.dcache.wb()
+  ret void
+}
+
+; CHECK-LABEL: {{^}}promote_alloca_enabled:
+; CHECK: ds_read_b32
+; CHECK: ; LDSByteSize: 5120
+define void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %tmp = load i32, i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
+  %load = load i32, i32* %arrayidx1
+  store i32 %load, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}promote_alloca_disabled:
+; CHECK: SCRATCH_RSRC_DWORD0
+; CHECK: SCRATCH_RSRC_DWORD1
+; CHECK: ScratchSize: 24
+define void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %tmp = load i32, i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
+  %load = load i32, i32* %arrayidx1
+  store i32 %load, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "target-cpu"="tahiti" }
+attributes #3 = { nounwind "target-cpu"="bonaire" }
+attributes #4 = { nounwind "target-cpu"="fiji" }
+attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-max-waves-per-eu"="3" }
+attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-max-waves-per-eu"="3" }




More information about the llvm-commits mailing list