<html>
  <head>
    <meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
  </head>
  <body bgcolor="#FFFFFF" text="#000000">
    <div class="moz-cite-prefix">On 11/06/2014 02:23 PM, Tom Stellard
      wrote:<br>
    </div>
    <blockquote cite="mid:20141106222332.GA7255@freedesktop.org"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">Hi,

Attached are patches to enable the R600 backend to emit code for the HSA runtime.
Please review.

-Tom
</pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0001-Triple-Add-AMDGPU-evironment-type.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 01af45d2e2f79add5047e53738e6e4eff77354b0 Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Wed, 22 Oct 2014 20:07:50 -0400
Subject: [PATCH 1/5] Triple: Add AMDGPU evironment type

This will be used to tell the R600 backend how to emit program data
in its object files.
---
 include/llvm/ADT/Triple.h | 1 +
 lib/Support/Triple.cpp    | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 4432390..c1b7882 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -155,6 +155,7 @@ public:
     MSVC,
     Itanium,
     Cygnus,
+    AMDGPU
   };
   enum ObjectFormatType {
     UnknownObjectFormat,
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 7a9dc39..c9fc785 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -176,6 +176,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   case MSVC: return "msvc";
   case Itanium: return "itanium";
   case Cygnus: return "cygnus";
+  case AMDGPU: return "amdgpu";
   }
 
   llvm_unreachable("Invalid EnvironmentType!<a class="moz-txt-link-rfc2396E" href="mailto:);@@-326,6+327,7@@staticTriple::EnvironmentTypeparseEnvironment(StringRefEnvironmentName){.StartsWith(">");
@@ -326,6 +327,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
     .StartsWith("</a>msvc", Triple::MSVC)
     .StartsWith("itanium", Triple::Itanium)
     .StartsWith("cygnus", Triple::Cygnus)
+    .StartsWith("amdgpu", Triple::AMDGPU)
     .Default(Triple::UnknownEnvironment);
 }
 
<div class="moz-txt-sig">-- 
1.8.5.5

</div></pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0002-Triple-Add-AMDHSA-operating-system-type.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 7e11a4bf46b839936124c4a24defcf557c51c64e Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Wed, 5 Nov 2014 11:50:40 -0500
Subject: [PATCH 2/5] Triple: Add AMDHSA operating system type

This operating system type represents the AMD HSA runtime,
and will be required by the R600 backend in order to generate
correct code for this runtime.
---
 include/llvm/ADT/Triple.h | 3 ++-
 lib/Support/Triple.cpp    | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index c1b7882..b46bce8 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -138,7 +138,8 @@ public:
     Bitrig,
     AIX,
     CUDA,       // NVIDIA CUDA
-    NVCL        // NVIDIA OpenCL
+    NVCL,       // NVIDIA OpenCL
+    AMDHSA      // AMD HSA Runtime
   };
   enum EnvironmentType {
     UnknownEnvironment,
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index c9fc785..dbcebe8 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -157,6 +157,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case AIX: return "aix";
   case CUDA: return "cuda";
   case NVCL: return "nvcl";
+  case AMDHSA: return "amdhsa";
   }
 
   llvm_unreachable("Invalid OSType<a class="moz-txt-link-rfc2396E" href="mailto:);@@-311,6+312,7@@staticTriple::OSTypeparseOS(StringRefOSName){.StartsWith(">");
@@ -311,6 +312,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("</a>aix", Triple::AIX)
     .StartsWith("cuda", Triple::CUDA)
     .StartsWith("nvcl", Triple::NVCL)
+    .StartsWith("amdhsa", Triple::AMDHSA)
     .Default(Triple::UnknownOS);
 }
 
<div class="moz-txt-sig">-- 
1.8.5.5

</div></pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0003-R600-SI-Set-the-ATC-bit-on-all-resource-descriptors-.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 2128345551c8830a1782536df28225ae2c9bb506 Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Mon, 3 Nov 2014 15:40:13 -0500
Subject: [PATCH 3/5] R600/SI: Set the ATC bit on all resource descriptors for
 the HSA runtime

---
 lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 7 +++++++
 lib/Target/R600/AMDGPUSubtarget.cpp    | 3 ++-
 lib/Target/R600/AMDGPUSubtarget.h      | 4 ++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index aa9062d..a680507 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -952,6 +952,13 @@ static SDValue buildSMovImm32(SelectionDAG *DAG, SDLoc DL, uint64_t Val) {
 static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
                          uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
 
+  const AMDGPUSubtarget &ST = DAG->getTarget().getSubtarget<AMDGPUSubtarget>();
+
+  if (ST.isAmdHsaOS()) {
+    // HSA requires the ATC bit to be set for all resource descriptors.
+    RsrcDword2And3 |= (1ULL << 56);
+  }
+</pre>
      </div>
    </blockquote>
    Should this constant go to an HSADefines header?<br>
    <br>
    <blockquote cite="mid:20141106222332.GA7255@freedesktop.org"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">
   SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
   SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
   if (RsrcDword1) {
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 9d09a19..0d693c8 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -84,7 +84,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)
                     0),
-      InstrItins(getInstrItineraryForCPU(GPU)) {
+      InstrItins(getInstrItineraryForCPU(GPU)),
+      TargetTriple(TT) {
   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
     InstrInfo.reset(new R600InstrInfo(*this));
     TLInfo.reset(new R600TargetLowering(TM));
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 55a0c58..ff37932 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -70,6 +70,7 @@ private:
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
   std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
   InstrItineraryData InstrItins;
+  Triple TargetTriple;
 
 public:
   AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
@@ -219,6 +220,9 @@ public:
   bool r600ALUEncoding() const {
     return R600ALUInst;
   }
+  bool isAmdHsaOS() const {
+    return TargetTriple.getOS() == Triple::AMDHSA;
+  }
 };
 
 } // End namespace llvm
<div class="moz-txt-sig">-- 
1.8.5.5

</div></pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0004-R600-SI-Move-more-information-into-SIProgramInfo-str.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From d67699d6c50a2eb2159239d2e431e11cc5d3e817 Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Wed, 22 Oct 2014 20:10:30 -0400
Subject: [PATCH 4/5] R600/SI: Move more information into SIProgramInfo struct

---
 lib/Target/R600/AMDGPUAsmPrinter.cpp | 85 ++++++++++++++++++------------------
 lib/Target/R600/AMDGPUAsmPrinter.h   | 25 +++++++++--
 2 files changed, 64 insertions(+), 46 deletions(-)

diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 5511d7c..aa29997 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -240,6 +240,8 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
 
 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                                         const MachineFunction &MF) const {
+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   uint64_t CodeSize = 0;
   unsigned MaxSGPR = 0;
   unsigned MaxVGPR = 0;
@@ -340,6 +342,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.NumVGPR = MaxVGPR + 1;
   ProgInfo.NumSGPR = MaxSGPR + 1;
 
+  ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
+  ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
   // register.
   ProgInfo.FloatMode = getFPMode(MF);
@@ -356,21 +360,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.FlatUsed = FlatUsed;
   ProgInfo.VCCUsed = VCCUsed;
   ProgInfo.CodeLen = CodeSize;
-}
 
-void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
-                                         const SIProgramInfo &KernelInfo) {
-  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
-  unsigned RsrcReg;
-  switch (MFI->getShaderType()) {
-  default: // Fall through
-  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
-  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
-  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
-  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
-  }
+  ProgInfo.ComputePGMRSrc1 =
+      S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
+      S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
+      S_00B848_PRIORITY(ProgInfo.Priority) |
+      S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
+      S_00B848_PRIV(ProgInfo.Priv) |
+      S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
+      S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
+      S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
 
   unsigned LDSAlignShift;
   if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
@@ -384,58 +383,60 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
   unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
                           MFI->getMaximumWorkGroupSize(MF);
 
-  unsigned LDSBlocks =
-     RoundUpToAlignment(MFI->LDSSize + LDSSpillSize,
-                             1 << LDSAlignShift) >> LDSAlignShift;
+  ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
+  ProgInfo.LDSBlocks =
+     RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
 
   // Scratch is allocated in 256 dword blocks.
   unsigned ScratchAlignShift = 10;
   // We need to program the hardware with the amount of scratch memory that
-  // is used by the entire wave.  KernelInfo.ScratchSize is the amount of
+  // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
   // scratch memory used per thread.
-  unsigned ScratchBlocks =
-    RoundUpToAlignment(KernelInfo.ScratchSize * STM.getWavefrontSize(),
+  ProgInfo.ScratchBlocks =
+    RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
                        1 << ScratchAlignShift) >> ScratchAlignShift;
 
-  unsigned VGPRBlocks = (KernelInfo.NumVGPR - 1) / 4;
-  unsigned SGPRBlocks = (KernelInfo.NumSGPR - 1) / 8;
+  ProgInfo.ComputePGMRSrc2 =
+      S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
+      S_00B02C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0);
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
+                                         const SIProgramInfo &KernelInfo) {
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  unsigned RsrcReg;
+  switch (MFI->getShaderType()) {
+  default: // Fall through
+  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
+  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
+  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
+  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
+  }
+
 
   if (MFI->getShaderType() == ShaderType::COMPUTE) {
     OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
 
-    const uint32_t ComputePGMRSrc1 =
-      S_00B848_VGPRS(VGPRBlocks) |
-      S_00B848_SGPRS(SGPRBlocks) |
-      S_00B848_PRIORITY(KernelInfo.Priority) |
-      S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
-      S_00B848_PRIV(KernelInfo.Priv) |
-      S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
-      S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
-      S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
-
-    OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
+    OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
 
     OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
-    const uint32_t ComputePGMRSrc2 =
-      S_00B84C_LDS_SIZE(LDSBlocks) |
-      S_00B02C_SCRATCH_EN(ScratchBlocks > 0);
-
-    OutStreamer.EmitIntValue(ComputePGMRSrc2, 4);
+    OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
 
     OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
-    OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
+    OutStreamer.EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
 
     // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
     // 0" comment but I don't see a corresponding field in the register spec.
   } else {
     OutStreamer.EmitIntValue(RsrcReg, 4);
-    OutStreamer.EmitIntValue(S_00B028_VGPRS(VGPRBlocks) |
-                             S_00B028_SGPRS(SGPRBlocks), 4);
+    OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
+                             S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
   }
 
   if (MFI->getShaderType() == ShaderType::PIXEL) {
     OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
-    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
+    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
     OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
     OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
   }
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index b9a0767..61f86d6 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -24,8 +24,8 @@ class AMDGPUAsmPrinter : public AsmPrinter {
 private:
   struct SIProgramInfo {
     SIProgramInfo() :
-      NumVGPR(0),
-      NumSGPR(0),
+      VGPRBlocks(0),
+      SGPRBlocks(0),
       Priority(0),
       FloatMode(0),
       Priv(0),
@@ -33,13 +33,19 @@ private:
       DebugMode(0),
       IEEEMode(0),
       ScratchSize(0),
+      ComputePGMRSrc1(0),
+      LDSBlocks(0),
+      ScratchBlocks(0),
+      ComputePGMRSrc2(0),
+      NumVGPR(0),
+      NumSGPR(0),
       FlatUsed(false),
       VCCUsed(false),
       CodeLen(0) {}
 
     // Fields set in PGM_RSRC1 pm4 packet.
-    uint32_t NumVGPR;
-    uint32_t NumSGPR;
+    uint32_t VGPRBlocks;
+    uint32_t SGPRBlocks;
     uint32_t Priority;
     uint32_t FloatMode;
     uint32_t Priv;
@@ -48,6 +54,17 @@ private:
     uint32_t IEEEMode;
     uint32_t ScratchSize;
 
+    uint32_t ComputePGMRSrc1;
+
+    // Fields set in PGM_RSRC2 pm4 packet.
+    uint32_t LDSBlocks;
+    uint32_t ScratchBlocks;
+
+    uint32_t ComputePGMRSrc2;
+
+    uint32_t NumVGPR;
+    uint32_t NumSGPR;
+    uint32_t LDSSize;
     bool FlatUsed;
 
     // Bonus information for debugging.
<div class="moz-txt-sig">-- 
1.8.5.5

</div></pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0005-R600-SI-Emit-amd_kernel_code_t-header-for-AMDGPU-env.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-unicode">
        <pre wrap="">From b5947e39f6d07f7202cfdc6899fdbc3528f0e4cf Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Wed, 22 Oct 2014 20:11:40 -0400
Subject: [PATCH 5/5] R600/SI: Emit amd_kernel_code_t header for AMDGPU
 environment

---
 lib/Target/R600/AMDGPUAsmPrinter.cpp |  65 +++-
 lib/Target/R600/AMDGPUAsmPrinter.h   |   2 +
 lib/Target/R600/AMDGPUSubtarget.cpp  |   7 +
 lib/Target/R600/AMDGPUSubtarget.h    |   6 +
 lib/Target/R600/AMDKernelCodeT.h     | 692 +++++++++++++++++++++++++++++++++++
 5 files changed, 771 insertions(+), 1 deletion(-)
 create mode 100644 lib/Target/R600/AMDKernelCodeT.h

diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index aa29997..b78c5029 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -18,6 +18,7 @@
 
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600MachineFunctionInfo.h<a class="moz-txt-link-rfc2396E" href="mailto:@@-109,11+110,17@@boolAMDGPUAsmPrinter::runOnMachineFunction(MachineFunction&MF){constMCSectionELF*ConfigSection=Context.getELFSection(">"
@@ -109,11 +110,17 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const MCSectionELF *ConfigSection = Context.getELFSection("</a>.AMDGPU.config",
                                               ELF::SHT_PROGBITS, 0,
                                               SectionKind::getReadOnly());
+
   OutStreamer.SwitchSection(ConfigSection);
 
   const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
-  if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+  if (STM.isAMDGPUEnv()) {
+    OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+    getSIProgramInfo(KernelInfo, MF);
+    EmitAmdKernelCodeT(MF, KernelInfo);
+    OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
+  } else if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {</pre>
      </div>
    </blockquote>
    Can you change this to be >= SOUTHERN_ISLANDS to be more clear?<br>
    <blockquote cite="mid:20141106222332.GA7255@freedesktop.org"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-unicode">
        <pre wrap="">
     getSIProgramInfo(KernelInfo, MF);
     EmitProgramInfoSI(MF, KernelInfo);
   } else {
@@ -441,3 +448,59 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
     OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
   }
 }
+
+void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
+                                        const SIProgramInfo &KernelInfo) const {
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  amd_kernel_code_t header;
+
+  memset(&header, 0, sizeof(header));
+
+  header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
+  header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
+
+  header.struct_byte_size = sizeof(amd_kernel_code_t);
+
+  header.target_chip = STM.getAmdKernelCodeChipID();
+
+  header.kernel_code_entry_byte_offset = (1 << MF.getAlignment());
+
+  header.compute_pgm_resource_registers =
+      KernelInfo.ComputePGMRSrc1 |
+      (((uint64_t)KernelInfo.ComputePGMRSrc2) << 32);
+
+  // Code Properties:
+  header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+                           AMD_CODE_PROPERTY_IS_PTR64;
+
+  if (KernelInfo.FlatUsed)
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+
+  if (KernelInfo.ScratchBlocks)
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+
+  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+  header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+
+  // MFI->ABIArgOffset is the number of bytes for the kernel arguments
+  // plus 36.  36 is the number of bytes reserved at the begining of the
+  // input buffer to store work-group size information.
+  // FIXME: We should be adding the size of the implicit arguments
+  // to this value.
+  header.kernarg_segment_byte_size = MFI->ABIArgOffset;
+
+  header.wavefront_sgpr_count = KernelInfo.NumVGPR;
+  header.workitem_vgpr_count = KernelInfo.NumSGPR;
+
+  // FIXME: What values do I put for these alignments
+  header.kernarg_segment_alignment = 0;
+  header.group_segment_alignment = 0;
+  header.private_segment_alignment = 0;</pre>
      </div>
    </blockquote>
    According to the output of SC on a random kernel,<br>
    kernarg_segment_alignment                      = 3<br>
    group_segment_alignment                        = 3<br>
    private_segment_alignment                      = 4<br>
    <br>
    We should also probably add printing the same metadata to the text
    output.<br>
    <blockquote cite="mid:20141106222332.GA7255@freedesktop.org"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-unicode">
        <pre wrap="">
+
+  header.code_type = 1; // HSA_EXT_CODE_KERNEL
+
+  header.wavefront_size = STM.getWavefrontSize();
+
+  OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 61f86d6..5bfbf73 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -81,6 +81,8 @@ private:
   /// can correctly setup the GPU state.
   void EmitProgramInfoR600(const MachineFunction &MF);
   void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
+  void EmitAmdKernelCodeT(const MachineFunction &MF,
+                          const SIProgramInfo &KernelInfo) const;
 
 public:
   explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 0d693c8..a805188 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -108,3 +108,10 @@ unsigned AMDGPUSubtarget::getStackEntrySize() const {
     llvm_unreachable("Illegal wavefront size.");
   }
 }
+
+unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
+  switch(getGeneration()) {
+  default: llvm_unreachable("ChipID unknown");
+  case SEA_ISLANDS: return 12;
+  }
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index ff37932..9fe8e3d 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -201,6 +201,12 @@ public:
     return LocalMemorySize;
   }
 
+  bool isAMDGPUEnv() const {
+    return TargetTriple.getEnvironment() == Triple::AMDGPU;
+  }
+
+  unsigned getAmdKernelCodeChipID() const;
+
   bool enableMachineScheduler() const override {
     return getGeneration() <= NORTHERN_ISLANDS;
   }

</pre>
      </div>
    </blockquote>
    <br>
  </body>
</html>