[PATCH] D16594: AMDGPU: Fix default device handling

Tue Jan 26 11:59:59 PST 2016

arsenm created this revision.
arsenm added a reviewer: tstellarAMD.
arsenm added a subscriber: llvm-commits.
Herald added a subscriber: arsenm.

When no device name is specified, default to kaveri
for HSA since SI is not supported and it woud fail.
    
Move default device handling to the TargetMachine
rather than the AMDGPUSubtarget. The module ISA version
is computed from the device name provided with the target
machine, so the attributes printed by the AsmPrinter were
inconsistent with those computed in the subtarget.
    
Also remove DevName field from subtarget since it's redundant
with getCPU() in the superclass.


http://reviews.llvm.org/D16594

Files:
  lib/Target/AMDGPU/AMDGPUSubtarget.cpp
  lib/Target/AMDGPU/AMDGPUSubtarget.h
  lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
  test/CodeGen/AMDGPU/hsa-default-device.ll

Index: test/CodeGen/AMDGPU/hsa-default-device.ll
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/hsa-default-device.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+
+; Make sure that with an HSA triple, we don't default to an
+; unsupported device.
+
+; CHECK: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
+  store float 0.0, float addrspace(1)* %out0
+  ret void
+}
+
Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -88,14 +88,28 @@
   return Ret;
 }
 
+LLVM_READNONE
+static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
+  if (!GPU.empty())
+    return GPU;
+
+  // HSA only supports CI+, so change the default GPU to a CI for HSA.
+  if (TT.getArch() == Triple::amdgcn)
+    return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti";
+
+  return "";
+}
+
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                                          StringRef CPU, StringRef FS,
                                          TargetOptions Options, Reloc::Model RM,
                                          CodeModel::Model CM,
                                          CodeGenOpt::Level OptLevel)
-    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+    : LLVMTargetMachine(T, computeDataLayout(TT), TT,
+                        getGPUOrDefault(TT, CPU), FS, Options, RM, CM,
                         OptLevel),
-      TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this),
+      TLOF(createTLOF(getTargetTriple())),
+      Subtarget(TT, getTargetCPU(), FS, *this),
       IntrinsicInfo() {
   setRequiresStructuredCFG(true);
   initAsmInfo();
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -58,7 +58,6 @@
   };
 
 private:
-  std::string DevName;
   bool DumpCode;
   bool R600ALUInst;
   bool HasVertexCache;
@@ -274,10 +273,6 @@
     return false;
   }
 
-  StringRef getDeviceName() const {
-    return DevName;
-  }
-
   bool enableHugeScratchBuffer() const {
     return EnableHugeScratchBuffer;
   }
Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -49,9 +49,6 @@
     FullFS += "+flat-for-global,";
   FullFS += FS;
 
-  if (GPU == "" && TT.getArch() == Triple::amdgcn)
-    GPU = "SI";
-
   ParseSubtargetFeatures(GPU, FullFS);
 
   // FIXME: I don't think think Evergreen has any useful support for
@@ -66,7 +63,7 @@
 
 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                                  TargetMachine &TM)
-    : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU),
+    : AMDGPUGenSubtargetInfo(TT, GPU, FS),
       DumpCode(false), R600ALUInst(false), HasVertexCache(false),
       TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
       FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16594.46025.patch
Type: text/x-patch
Size: 3415 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160126/7263f8fc/attachment.bin>