[clang] 9acb533 - [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (#95061)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 25 04:19:32 PDT 2024
Author: Alex Voicu
Date: 2024-06-25T12:19:28+01:00
New Revision: 9acb533c38be833ec1d8daa06e127a9de8f0a5ef
URL: https://github.com/llvm/llvm-project/commit/9acb533c38be833ec1d8daa06e127a9de8f0a5ef
DIFF: https://github.com/llvm/llvm-project/commit/9acb533c38be833ec1d8daa06e127a9de8f0a5ef.diff
LOG: [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (#95061)
This patch augments the HIPAMD driver to allow it to target AMDGCN
flavoured SPIR-V compilation. It's mostly straightforward, as we re-use
some of the existing SPIRV infra, however there are a few notable
additions:
- we introduce an `amdgcnspirv` offload arch, rather than relying on
using `generic` (this is already fairly overloaded) or simply using
`spirv` or `spirv64` (we'll want to use these to denote unflavoured
SPIRV, once we bring up that capability)
- initially it is won't be possible to mix-in SPIR-V and concrete AMDGPU
targets, as it would require some relatively intrusive surgery in the
HIPAMD Toolchain and the Driver to deal with two triples
(`spirv64-amd-amdhsa` and `amdgcn-amd-amdhsa`, respectively)
- in order to retain user provided compiler flags and have them
available at JIT time, we rely on embedding the command line via
`-fembed-bitcode=marker`, which the bitcode writer had previously not
implemented for SPIRV; we only allow it conditionally for AMDGCN
flavoured SPIRV, and it is handled correctly by the Translator (it ends
up as a string literal)
Once the SPIRV BE is no longer experimental we'll switch to using that
rather than the translator. There's some additional work that'll come
via a separate PR around correctly piping through AMDGCN's
implementation of `printf`, for now we merely handle its flags
correctly.
Added:
Modified:
clang/include/clang/Basic/Cuda.h
clang/lib/Basic/Cuda.cpp
clang/lib/Basic/Targets/NVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Driver/ToolChains/HIPAMD.cpp
clang/lib/Driver/ToolChains/HIPAMD.h
clang/test/Driver/cuda-arch-translation.cu
clang/test/Frontend/embed-bitcode.ll
clang/test/Misc/target-invalid-cpu-note.c
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 0d5e38e825aa7..01cfe286c491b 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -128,6 +128,7 @@ enum class CudaArch {
GFX12_GENERIC,
GFX1200,
GFX1201,
+ AMDGCNSPIRV,
Generic, // A processor model named 'generic' if the target backend defines a
// public one.
LAST,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1d96a929f95d8..af99c4d61021e 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -148,6 +148,7 @@ static const CudaArchToStringMap arch_names[] = {
{CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
GFX(1200), // gfx1200
GFX(1201), // gfx1201
+ {CudaArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
{CudaArch::Generic, "generic", ""},
// clang-format on
};
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index ff7d2f1f92aa4..8e9006853db65 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -232,6 +232,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
+ case CudaArch::AMDGCNSPIRV:
case CudaArch::Generic:
case CudaArch::LAST:
break;
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 6e9a1bacd9bf5..6df34774334fa 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3541,6 +3541,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
+ case CudaArch::AMDGCNSPIRV:
case CudaArch::Generic:
case CudaArch::UNUSED:
case CudaArch::UNKNOWN:
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 76534475e88f7..652f519d82488 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -907,7 +907,8 @@ void CodeGenModule::Release() {
if (Context.getTargetInfo().getTriple().isWasm())
EmitMainVoidAlias();
- if (getTriple().isAMDGPU()) {
+ if (getTriple().isAMDGPU() ||
+ (getTriple().isSPIRV() && getTriple().getVendor() == llvm::Triple::AMD)) {
// Emit amdhsa_code_object_version module flag, which is code object version
// times 100.
if (getTarget().getTargetOpts().CodeObjectVersion !=
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 33ab7cc3f3968..6823f5424cef0 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -147,6 +147,14 @@ getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args,
static std::optional<llvm::Triple>
getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
if (!Args.hasArg(options::OPT_offload_EQ)) {
+ auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ);
+ if (llvm::find(OffloadArchs, "amdgcnspirv") != OffloadArchs.cend()) {
+ if (OffloadArchs.size() == 1)
+ return llvm::Triple("spirv64-amd-amdhsa");
+ // Mixing specific & SPIR-V compilation is not supported for now.
+ D.Diag(diag::err_drv_only_one_offload_target_supported);
+ return std::nullopt;
+ }
return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple.
}
auto TT = getOffloadTargetTriple(D, Args);
@@ -3231,10 +3239,14 @@ class OffloadingActionBuilder final {
// supported GPUs. sm_20 code should work correctly, if
// suboptimally, on all newer GPUs.
if (GpuArchList.empty()) {
- if (ToolChains.front()->getTriple().isSPIRV())
- GpuArchList.push_back(CudaArch::Generic);
- else
+ if (ToolChains.front()->getTriple().isSPIRV()) {
+ if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD)
+ GpuArchList.push_back(CudaArch::AMDGCNSPIRV);
+ else
+ GpuArchList.push_back(CudaArch::Generic);
+ } else {
GpuArchList.push_back(DefaultCudaArch);
+ }
}
return Error;
@@ -6501,9 +6513,11 @@ const ToolChain &Driver::getOffloadingDeviceToolChain(
// things.
switch (TargetDeviceOffloadKind) {
case Action::OFK_HIP: {
- if (Target.getArch() == llvm::Triple::amdgcn &&
- Target.getVendor() == llvm::Triple::AMD &&
- Target.getOS() == llvm::Triple::AMDHSA)
+ if (((Target.getArch() == llvm::Triple::amdgcn ||
+ Target.getArch() == llvm::Triple::spirv64) &&
+ Target.getVendor() == llvm::Triple::AMD &&
+ Target.getOS() == llvm::Triple::AMDHSA) ||
+ !Args.hasArgNoClaim(options::OPT_offload_EQ))
TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target,
HostTC, Args);
else if (Target.getArch() == llvm::Triple::spirv64 &&
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 2ce9e2f4bcfcd..c0f6bc0c2e45a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4939,7 +4939,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
if (JA.isDeviceOffloading(Action::OFK_HIP) &&
- getToolChain().getTriple().isAMDGPU()) {
+ (getToolChain().getTriple().isAMDGPU() ||
+ (getToolChain().getTriple().isSPIRV() &&
+ getToolChain().getTriple().getVendor() == llvm::Triple::AMD))) {
// Device side compilation printf
if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
CmdArgs.push_back(Args.MakeArgString(
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 34236e8bcf949..c35b0febb262d 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -10,6 +10,7 @@
#include "AMDGPU.h"
#include "CommonArgs.h"
#include "HIPUtility.h"
+#include "SPIRV.h"
#include "clang/Basic/Cuda.h"
#include "clang/Basic/TargetID.h"
#include "clang/Driver/Compilation.h"
@@ -193,6 +194,33 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
Lld, LldArgs, Inputs, Output));
}
+// For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode
+// and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It
+// calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will
+// be promoted from experimental, we will switch to using that. TODO: consider
+// if we want to run any targeted optimisations over IR here, over generic
+// SPIR-V.
+void AMDGCN::Linker::constructLinkAndEmitSpirvCommand(
+ Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
+ const InputInfo &Output, const llvm::opt::ArgList &Args) const {
+ assert(!Inputs.empty() && "Must have at least one input.");
+
+ constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
+
+ // Linked BC is now in Output
+
+ // Emit SPIR-V binary.
+ llvm::opt::ArgStringList TrArgs{
+ "--spirv-max-version=1.6",
+ "--spirv-ext=+all",
+ "--spirv-allow-extra-diexpressions",
+ "--spirv-allow-unknown-intrinsics",
+ "--spirv-lower-const-expr",
+ "--spirv-preserve-auxdata",
+ "--spirv-debug-info-version=nonsemantic-shader-200"};
+ SPIRV::constructTranslateCommand(C, *this, JA, Output, Output, TrArgs);
+}
+
// For amdgcn the inputs of the linker job are device bitcode and output is
// either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
// llc, then lld steps.
@@ -214,6 +242,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (JA.getType() == types::TY_LLVM_BC)
return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
+ if (getToolChain().getTriple().isSPIRV())
+ return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args);
+
return constructLldCommand(C, JA, Inputs, Output, Args);
}
@@ -270,6 +301,13 @@ void HIPAMDToolChain::addClangTargetOptions(
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
+ // For SPIR-V we embed the command-line into the generated binary, in order to
+ // retrieve it at JIT time and be able to do target specific compilation with
+ // options that match the user-supplied ones.
+ if (getTriple().isSPIRV() &&
+ !DriverArgs.hasArg(options::OPT_fembed_bitcode_marker))
+ CC1Args.push_back("-fembed-bitcode=marker");
+
for (auto BCFile : getDeviceLibs(DriverArgs)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
@@ -303,7 +341,8 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
}
Tool *HIPAMDToolChain::buildLinker() const {
- assert(getTriple().getArch() == llvm::Triple::amdgcn);
+ assert(getTriple().getArch() == llvm::Triple::amdgcn ||
+ getTriple().getArch() == llvm::Triple::spirv64);
return new tools::AMDGCN::Linker(*this);
}
@@ -358,7 +397,9 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
- if (DriverArgs.hasArg(options::OPT_nogpulib))
+ if (DriverArgs.hasArg(options::OPT_nogpulib) ||
+ (getTriple().getArch() == llvm::Triple::spirv64 &&
+ getTriple().getVendor() == llvm::Triple::AMD))
return {};
ArgStringList LibraryPaths;
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h
index d81a9733014cc..c31894e22c5c8 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.h
+++ b/clang/lib/Driver/ToolChains/HIPAMD.h
@@ -40,6 +40,10 @@ class LLVM_LIBRARY_VISIBILITY Linker final : public Tool {
const InputInfoList &Inputs,
const InputInfo &Output,
const llvm::opt::ArgList &Args) const;
+ void constructLinkAndEmitSpirvCommand(Compilation &C, const JobAction &JA,
+ const InputInfoList &Inputs,
+ const InputInfo &Output,
+ const llvm::opt::ArgList &Args) const;
};
} // end namespace AMDGCN
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index ff97f2dbda6c5..e96191cc9d418 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -59,6 +59,8 @@
// RUN: | FileCheck -check-prefixes=HIP,GFX900 %s
// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=gfx902 -nogpuinc -nogpulib %s 2>&1 \
// RUN: | FileCheck -check-prefixes=HIP,GFX902 %s
+// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=amdgcnspirv -nogpuinc -nogpulib %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=HIP,SPIRV %s
// CUDA: ptxas
// CUDA-SAME: -m64
@@ -95,3 +97,4 @@
// GFX810:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx810
// GFX900:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx900
// GFX902:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx902
+// SPIRV:-targets=host-x86_64-unknown-linux,hip-spirv64-amd-amdhsa--amdgcnspirv
diff --git a/clang/test/Frontend/embed-bitcode.ll b/clang/test/Frontend/embed-bitcode.ll
index 9b8632d04dd98..0959af48ad24a 100644
--- a/clang/test/Frontend/embed-bitcode.ll
+++ b/clang/test/Frontend/embed-bitcode.ll
@@ -10,6 +10,9 @@
; RUN: %clang_cc1 -triple aarch64 -emit-llvm \
; RUN: -fembed-bitcode=all -x ir %s -o - \
; RUN: | FileCheck %s -check-prefix=CHECK-ELF
+; RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm \
+; RUN: -fembed-bitcode=all -x ir %s -o - \
+; RUN: | FileCheck %s -check-prefix=CHECK-ELF
; check .bc input
; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 1a9063ee5a257..700860378ed0c 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -29,7 +29,7 @@
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201, amdgcnspirv{{$}}
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index ba16c0851e1fd..7a228fb6c08b9 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -5309,6 +5309,8 @@ static const char *getSectionNameForBitcode(const Triple &T) {
llvm_unreachable("GOFF is not yet implemented");
break;
case Triple::SPIRV:
+ if (T.getVendor() == Triple::AMD)
+ return ".llvmbc";
llvm_unreachable("SPIRV is not yet implemented");
break;
case Triple::XCOFF:
@@ -5334,6 +5336,8 @@ static const char *getSectionNameForCommandline(const Triple &T) {
llvm_unreachable("GOFF is not yet implemented");
break;
case Triple::SPIRV:
+ if (T.getVendor() == Triple::AMD)
+ return ".llvmcmd";
llvm_unreachable("SPIRV is not yet implemented");
break;
case Triple::XCOFF:
More information about the cfe-commits
mailing list