PATCHES: R600/SI: CodeGen patches for HSA Runtime
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Nov 6 15:01:53 PST 2014
On 11/06/2014 02:23 PM, Tom Stellard wrote:
> Hi,
>
> Attached are patches to enable the R600 backend to emit code for the HSA runtime.
> Please review.
>
> -Tom
>
> 0001-Triple-Add-AMDGPU-evironment-type.patch
>
>
> From 01af45d2e2f79add5047e53738e6e4eff77354b0 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Wed, 22 Oct 2014 20:07:50 -0400
> Subject: [PATCH 1/5] Triple: Add AMDGPU evironment type
>
> This will be used to tell the R600 backend how to emit program data
> in its object files.
> ---
> include/llvm/ADT/Triple.h | 1 +
> lib/Support/Triple.cpp | 2 ++
> 2 files changed, 3 insertions(+)
>
> diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
> index 4432390..c1b7882 100644
> --- a/include/llvm/ADT/Triple.h
> +++ b/include/llvm/ADT/Triple.h
> @@ -155,6 +155,7 @@ public:
> MSVC,
> Itanium,
> Cygnus,
> + AMDGPU
> };
> enum ObjectFormatType {
> UnknownObjectFormat,
> diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
> index 7a9dc39..c9fc785 100644
> --- a/lib/Support/Triple.cpp
> +++ b/lib/Support/Triple.cpp
> @@ -176,6 +176,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
> case MSVC: return "msvc";
> case Itanium: return "itanium";
> case Cygnus: return "cygnus";
> + case AMDGPU: return "amdgpu";
> }
>
> llvm_unreachable("Invalid EnvironmentType!");
> @@ -326,6 +327,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
> .StartsWith("msvc", Triple::MSVC)
> .StartsWith("itanium", Triple::Itanium)
> .StartsWith("cygnus", Triple::Cygnus)
> + .StartsWith("amdgpu", Triple::AMDGPU)
> .Default(Triple::UnknownEnvironment);
> }
>
> -- 1.8.5.5
>
> 0002-Triple-Add-AMDHSA-operating-system-type.patch
>
>
> From 7e11a4bf46b839936124c4a24defcf557c51c64e Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Wed, 5 Nov 2014 11:50:40 -0500
> Subject: [PATCH 2/5] Triple: Add AMDHSA operating system type
>
> This operating system type represents the AMD HSA runtime,
> and will be required by the R600 backend in order to generate
> correct code for this runtime.
> ---
> include/llvm/ADT/Triple.h | 3 ++-
> lib/Support/Triple.cpp | 2 ++
> 2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
> index c1b7882..b46bce8 100644
> --- a/include/llvm/ADT/Triple.h
> +++ b/include/llvm/ADT/Triple.h
> @@ -138,7 +138,8 @@ public:
> Bitrig,
> AIX,
> CUDA, // NVIDIA CUDA
> - NVCL // NVIDIA OpenCL
> + NVCL, // NVIDIA OpenCL
> + AMDHSA // AMD HSA Runtime
> };
> enum EnvironmentType {
> UnknownEnvironment,
> diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
> index c9fc785..dbcebe8 100644
> --- a/lib/Support/Triple.cpp
> +++ b/lib/Support/Triple.cpp
> @@ -157,6 +157,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
> case AIX: return "aix";
> case CUDA: return "cuda";
> case NVCL: return "nvcl";
> + case AMDHSA: return "amdhsa";
> }
>
> llvm_unreachable("Invalid OSType");
> @@ -311,6 +312,7 @@ static Triple::OSType parseOS(StringRef OSName) {
> .StartsWith("aix", Triple::AIX)
> .StartsWith("cuda", Triple::CUDA)
> .StartsWith("nvcl", Triple::NVCL)
> + .StartsWith("amdhsa", Triple::AMDHSA)
> .Default(Triple::UnknownOS);
> }
>
> -- 1.8.5.5
>
> 0003-R600-SI-Set-the-ATC-bit-on-all-resource-descriptors-.patch
>
>
> From 2128345551c8830a1782536df28225ae2c9bb506 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Mon, 3 Nov 2014 15:40:13 -0500
> Subject: [PATCH 3/5] R600/SI: Set the ATC bit on all resource descriptors for
> the HSA runtime
>
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 7 +++++++
> lib/Target/R600/AMDGPUSubtarget.cpp | 3 ++-
> lib/Target/R600/AMDGPUSubtarget.h | 4 ++++
> 3 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index aa9062d..a680507 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -952,6 +952,13 @@ static SDValue buildSMovImm32(SelectionDAG *DAG, SDLoc DL, uint64_t Val) {
> static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
> uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
>
> + const AMDGPUSubtarget &ST = DAG->getTarget().getSubtarget<AMDGPUSubtarget>();
> +
> + if (ST.isAmdHsaOS()) {
> + // HSA requires the ATC bit to be set for all resource descriptors.
> + RsrcDword2And3 |= (1ULL << 56);
> + }
> +
Should this constant go to an HSADefines header?
> SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
> SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
> if (RsrcDword1) {
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 9d09a19..0d693c8 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -84,7 +84,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
> FrameLowering(TargetFrameLowering::StackGrowsUp,
> 64 * 16, // Maximum stack alignment (long16)
> 0),
> - InstrItins(getInstrItineraryForCPU(GPU)) {
> + InstrItins(getInstrItineraryForCPU(GPU)),
> + TargetTriple(TT) {
> if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> InstrInfo.reset(new R600InstrInfo(*this));
> TLInfo.reset(new R600TargetLowering(TM));
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index 55a0c58..ff37932 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -70,6 +70,7 @@ private:
> std::unique_ptr<AMDGPUTargetLowering> TLInfo;
> std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
> InstrItineraryData InstrItins;
> + Triple TargetTriple;
>
> public:
> AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
> @@ -219,6 +220,9 @@ public:
> bool r600ALUEncoding() const {
> return R600ALUInst;
> }
> + bool isAmdHsaOS() const {
> + return TargetTriple.getOS() == Triple::AMDHSA;
> + }
> };
>
> } // End namespace llvm
> -- 1.8.5.5
>
> 0004-R600-SI-Move-more-information-into-SIProgramInfo-str.patch
>
>
> From d67699d6c50a2eb2159239d2e431e11cc5d3e817 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Wed, 22 Oct 2014 20:10:30 -0400
> Subject: [PATCH 4/5] R600/SI: Move more information into SIProgramInfo struct
>
> ---
> lib/Target/R600/AMDGPUAsmPrinter.cpp | 85 ++++++++++++++++++------------------
> lib/Target/R600/AMDGPUAsmPrinter.h | 25 +++++++++--
> 2 files changed, 64 insertions(+), 46 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> index 5511d7c..aa29997 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -240,6 +240,8 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
>
> void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
> const MachineFunction &MF) const {
> + const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> uint64_t CodeSize = 0;
> unsigned MaxSGPR = 0;
> unsigned MaxVGPR = 0;
> @@ -340,6 +342,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
> ProgInfo.NumVGPR = MaxVGPR + 1;
> ProgInfo.NumSGPR = MaxSGPR + 1;
>
> + ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
> + ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
> // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
> // register.
> ProgInfo.FloatMode = getFPMode(MF);
> @@ -356,21 +360,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
> ProgInfo.FlatUsed = FlatUsed;
> ProgInfo.VCCUsed = VCCUsed;
> ProgInfo.CodeLen = CodeSize;
> -}
>
> -void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
> - const SIProgramInfo &KernelInfo) {
> - const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
> - const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> -
> - unsigned RsrcReg;
> - switch (MFI->getShaderType()) {
> - default: // Fall through
> - case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
> - case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
> - case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
> - case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
> - }
> + ProgInfo.ComputePGMRSrc1 =
> + S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
> + S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
> + S_00B848_PRIORITY(ProgInfo.Priority) |
> + S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
> + S_00B848_PRIV(ProgInfo.Priv) |
> + S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
> + S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
> + S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
>
> unsigned LDSAlignShift;
> if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
> @@ -384,58 +383,60 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
> unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
> MFI->getMaximumWorkGroupSize(MF);
>
> - unsigned LDSBlocks =
> - RoundUpToAlignment(MFI->LDSSize + LDSSpillSize,
> - 1 << LDSAlignShift) >> LDSAlignShift;
> + ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
> + ProgInfo.LDSBlocks =
> + RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
>
> // Scratch is allocated in 256 dword blocks.
> unsigned ScratchAlignShift = 10;
> // We need to program the hardware with the amount of scratch memory that
> - // is used by the entire wave. KernelInfo.ScratchSize is the amount of
> + // is used by the entire wave. ProgInfo.ScratchSize is the amount of
> // scratch memory used per thread.
> - unsigned ScratchBlocks =
> - RoundUpToAlignment(KernelInfo.ScratchSize * STM.getWavefrontSize(),
> + ProgInfo.ScratchBlocks =
> + RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
> 1 << ScratchAlignShift) >> ScratchAlignShift;
>
> - unsigned VGPRBlocks = (KernelInfo.NumVGPR - 1) / 4;
> - unsigned SGPRBlocks = (KernelInfo.NumSGPR - 1) / 8;
> + ProgInfo.ComputePGMRSrc2 =
> + S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
> + S_00B02C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0);
> +}
> +
> +void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
> + const SIProgramInfo &KernelInfo) {
> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> +
> + unsigned RsrcReg;
> + switch (MFI->getShaderType()) {
> + default: // Fall through
> + case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
> + case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
> + case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
> + case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
> + }
> +
>
> if (MFI->getShaderType() == ShaderType::COMPUTE) {
> OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
>
> - const uint32_t ComputePGMRSrc1 =
> - S_00B848_VGPRS(VGPRBlocks) |
> - S_00B848_SGPRS(SGPRBlocks) |
> - S_00B848_PRIORITY(KernelInfo.Priority) |
> - S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
> - S_00B848_PRIV(KernelInfo.Priv) |
> - S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
> - S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
> - S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
> -
> - OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
> + OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
>
> OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
> - const uint32_t ComputePGMRSrc2 =
> - S_00B84C_LDS_SIZE(LDSBlocks) |
> - S_00B02C_SCRATCH_EN(ScratchBlocks > 0);
> -
> - OutStreamer.EmitIntValue(ComputePGMRSrc2, 4);
> + OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
>
> OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
> - OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
> + OutStreamer.EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
>
> // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
> // 0" comment but I don't see a corresponding field in the register spec.
> } else {
> OutStreamer.EmitIntValue(RsrcReg, 4);
> - OutStreamer.EmitIntValue(S_00B028_VGPRS(VGPRBlocks) |
> - S_00B028_SGPRS(SGPRBlocks), 4);
> + OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
> + S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
> }
>
> if (MFI->getShaderType() == ShaderType::PIXEL) {
> OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
> - OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
> + OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
> OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
> OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
> }
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
> index b9a0767..61f86d6 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.h
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.h
> @@ -24,8 +24,8 @@ class AMDGPUAsmPrinter : public AsmPrinter {
> private:
> struct SIProgramInfo {
> SIProgramInfo() :
> - NumVGPR(0),
> - NumSGPR(0),
> + VGPRBlocks(0),
> + SGPRBlocks(0),
> Priority(0),
> FloatMode(0),
> Priv(0),
> @@ -33,13 +33,19 @@ private:
> DebugMode(0),
> IEEEMode(0),
> ScratchSize(0),
> + ComputePGMRSrc1(0),
> + LDSBlocks(0),
> + ScratchBlocks(0),
> + ComputePGMRSrc2(0),
> + NumVGPR(0),
> + NumSGPR(0),
> FlatUsed(false),
> VCCUsed(false),
> CodeLen(0) {}
>
> // Fields set in PGM_RSRC1 pm4 packet.
> - uint32_t NumVGPR;
> - uint32_t NumSGPR;
> + uint32_t VGPRBlocks;
> + uint32_t SGPRBlocks;
> uint32_t Priority;
> uint32_t FloatMode;
> uint32_t Priv;
> @@ -48,6 +54,17 @@ private:
> uint32_t IEEEMode;
> uint32_t ScratchSize;
>
> + uint32_t ComputePGMRSrc1;
> +
> + // Fields set in PGM_RSRC2 pm4 packet.
> + uint32_t LDSBlocks;
> + uint32_t ScratchBlocks;
> +
> + uint32_t ComputePGMRSrc2;
> +
> + uint32_t NumVGPR;
> + uint32_t NumSGPR;
> + uint32_t LDSSize;
> bool FlatUsed;
>
> // Bonus information for debugging.
> -- 1.8.5.5
>
> 0005-R600-SI-Emit-amd_kernel_code_t-header-for-AMDGPU-env.patch
>
>
> From b5947e39f6d07f7202cfdc6899fdbc3528f0e4cf Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Wed, 22 Oct 2014 20:11:40 -0400
> Subject: [PATCH 5/5] R600/SI: Emit amd_kernel_code_t header for AMDGPU
> environment
>
> ---
> lib/Target/R600/AMDGPUAsmPrinter.cpp | 65 +++-
> lib/Target/R600/AMDGPUAsmPrinter.h | 2 +
> lib/Target/R600/AMDGPUSubtarget.cpp | 7 +
> lib/Target/R600/AMDGPUSubtarget.h | 6 +
> lib/Target/R600/AMDKernelCodeT.h | 692 +++++++++++++++++++++++++++++++++++
> 5 files changed, 771 insertions(+), 1 deletion(-)
> create mode 100644 lib/Target/R600/AMDKernelCodeT.h
>
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> index aa29997..b78c5029 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -18,6 +18,7 @@
>
> #include "AMDGPUAsmPrinter.h"
> #include "AMDGPU.h"
> +#include "AMDKernelCodeT.h"
> #include "AMDGPUSubtarget.h"
> #include "R600Defines.h"
> #include "R600MachineFunctionInfo.h"
> @@ -109,11 +110,17 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
> const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
> ELF::SHT_PROGBITS, 0,
> SectionKind::getReadOnly());
> +
> OutStreamer.SwitchSection(ConfigSection);
>
> const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
> SIProgramInfo KernelInfo;
> - if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
> + if (STM.isAMDGPUEnv()) {
> + OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
> + getSIProgramInfo(KernelInfo, MF);
> + EmitAmdKernelCodeT(MF, KernelInfo);
> + OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
> + } else if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
Can you change this to be >= SOUTHERN_ISLANDS to be more clear?
> getSIProgramInfo(KernelInfo, MF);
> EmitProgramInfoSI(MF, KernelInfo);
> } else {
> @@ -441,3 +448,59 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
> OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
> }
> }
> +
> +void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
> + const SIProgramInfo &KernelInfo) const {
> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> + const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
> + amd_kernel_code_t header;
> +
> + memset(&header, 0, sizeof(header));
> +
> + header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
> + header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
> +
> + header.struct_byte_size = sizeof(amd_kernel_code_t);
> +
> + header.target_chip = STM.getAmdKernelCodeChipID();
> +
> + header.kernel_code_entry_byte_offset = (1 << MF.getAlignment());
> +
> + header.compute_pgm_resource_registers =
> + KernelInfo.ComputePGMRSrc1 |
> + (((uint64_t)KernelInfo.ComputePGMRSrc2) << 32);
> +
> + // Code Properties:
> + header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
> + AMD_CODE_PROPERTY_IS_PTR64;
> +
> + if (KernelInfo.FlatUsed)
> + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
> +
> + if (KernelInfo.ScratchBlocks)
> + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
> +
> + header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
> + header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
> +
> + // MFI->ABIArgOffset is the number of bytes for the kernel arguments
> + // plus 36. 36 is the number of bytes reserved at the begining of the
> + // input buffer to store work-group size information.
> + // FIXME: We should be adding the size of the implicit arguments
> + // to this value.
> + header.kernarg_segment_byte_size = MFI->ABIArgOffset;
> +
> + header.wavefront_sgpr_count = KernelInfo.NumVGPR;
> + header.workitem_vgpr_count = KernelInfo.NumSGPR;
> +
> + // FIXME: What values do I put for these alignments
> + header.kernarg_segment_alignment = 0;
> + header.group_segment_alignment = 0;
> + header.private_segment_alignment = 0;
According to the output of SC on a random kernel,
kernarg_segment_alignment = 3
group_segment_alignment = 3
private_segment_alignment = 4
We should also probably add printing the same metadata to the text output.
> +
> + header.code_type = 1; // HSA_EXT_CODE_KERNEL
> +
> + header.wavefront_size = STM.getWavefrontSize();
> +
> + OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
> +}
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
> index 61f86d6..5bfbf73 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.h
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.h
> @@ -81,6 +81,8 @@ private:
> /// can correctly setup the GPU state.
> void EmitProgramInfoR600(const MachineFunction &MF);
> void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
> + void EmitAmdKernelCodeT(const MachineFunction &MF,
> + const SIProgramInfo &KernelInfo) const;
>
> public:
> explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 0d693c8..a805188 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -108,3 +108,10 @@ unsigned AMDGPUSubtarget::getStackEntrySize() const {
> llvm_unreachable("Illegal wavefront size.");
> }
> }
> +
> +unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
> + switch(getGeneration()) {
> + default: llvm_unreachable("ChipID unknown");
> + case SEA_ISLANDS: return 12;
> + }
> +}
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index ff37932..9fe8e3d 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -201,6 +201,12 @@ public:
> return LocalMemorySize;
> }
>
> + bool isAMDGPUEnv() const {
> + return TargetTriple.getEnvironment() == Triple::AMDGPU;
> + }
> +
> + unsigned getAmdKernelCodeChipID() const;
> +
> bool enableMachineScheduler() const override {
> return getGeneration() <= NORTHERN_ISLANDS;
> }
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141106/31bbe86f/attachment.html>
More information about the llvm-commits
mailing list