[clang] 9a40858 - [HIP][Clang][Driver] Add Driver support for `hipstdpar`
Alex Voicu via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 3 05:15:00 PDT 2023
Author: Alex Voicu
Date: 2023-10-03T13:14:46+01:00
New Revision: 9a408588d1b8b7899eff593c537de539a4a12651
URL: https://github.com/llvm/llvm-project/commit/9a408588d1b8b7899eff593c537de539a4a12651
DIFF: https://github.com/llvm/llvm-project/commit/9a408588d1b8b7899eff593c537de539a4a12651.diff
LOG: [HIP][Clang][Driver] Add Driver support for `hipstdpar`
This patch adds the Driver changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. What this change does can be summed up as follows:
- add two flags, one for enabling `hipstdpar` compilation, the second enabling the optional allocation interposition mode;
- the flags correspond to new LangOpt members;
- if we are compiling or linking with --hipstdpar, we enable HIP; in the compilation case C and C++ inputs are treated as HIP inputs;
- the ROCm / AMDGPU driver is augmented to look for and include an implementation detail forwarding header; we error out if the user requested `hipstdpar` but the header or its dependencies cannot be found.
Tests for the behaviour described above are also added.
Reviewed by: MaskRay, yaxunl
Differential Revision: https://reviews.llvm.org/D155775
Added:
clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp
clang/test/Driver/Inputs/hipstdpar/rocprim/.keep
clang/test/Driver/Inputs/hipstdpar/thrust/.keep
clang/test/Driver/hipstdpar.c
Modified:
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Basic/LangOptions.def
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Driver/ToolChains/HIPAMD.cpp
clang/lib/Driver/ToolChains/ROCm.h
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 2a48c063e243ee0..91a95def4f80de4 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -70,6 +70,16 @@ def err_drv_no_rocm_device_lib : Error<
def err_drv_no_hip_runtime : Error<
"cannot find HIP runtime; provide its path via '--rocm-path', or pass "
"'-nogpuinc' to build without HIP runtime">;
+def err_drv_no_hipstdpar_lib : Error<
+ "cannot find HIP Standard Parallelism Acceleration library; provide it via "
+ "'--hipstdpar-path'">;
+def err_drv_no_hipstdpar_thrust_lib : Error<
+ "cannot find rocThrust, which is required by the HIP Standard Parallelism "
+ "Acceleration library; provide it via "
+ "'--hipstdpar-thrust-path'">;
+def err_drv_no_hipstdpar_prim_lib : Error<
+ "cannot find rocPrim, which is required by the HIP Standard Parallelism "
+ "Acceleration library; provide it via '--hipstdpar-prim-path'">;
def err_drv_no_hipspv_device_lib : Error<
"cannot find HIP device library%select{| for %1}0; provide its path via "
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 28c9bcec3ee60f1..c0ea4ecb9806a5b 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -280,6 +280,8 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)")
+LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)")
+LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)")
LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index ee4e23f335e7875..ff2130c93f28ea0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1258,6 +1258,32 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<hip_Group>,
HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">;
def hip_path_EQ : Joined<["--"], "hip-path=">, Group<hip_Group>,
HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">;
+def hipstdpar : Flag<["--"], "hipstdpar">,
+ Visibility<[ClangOption, CC1Option]>,
+ Group<CompileOnly_Group>,
+ HelpText<"Enable HIP acceleration for standard parallel algorithms">,
+ MarshallingInfoFlag<LangOpts<"HIPStdPar">>;
+def hipstdpar_interpose_alloc : Flag<["--"], "hipstdpar-interpose-alloc">,
+ Visibility<[ClangOption, CC1Option]>,
+ Group<CompileOnly_Group>,
+ HelpText<"Replace all memory allocation / deallocation calls with "
+ "hipManagedMalloc / hipFree equivalents">,
+ MarshallingInfoFlag<LangOpts<"HIPStdParInterposeAlloc">>;
+// TODO: use MarshallingInfo here
+def hipstdpar_path_EQ : Joined<["--"], "hipstdpar-path=">, Group<i_Group>,
+ HelpText<
+ "HIP Standard Parallel Algorithm Acceleration library path, used for "
+ "finding and implicitly including the library header">;
+def hipstdpar_thrust_path_EQ : Joined<["--"], "hipstdpar-thrust-path=">,
+ Group<i_Group>,
+ HelpText<
+ "rocThrust path, required by the HIP Standard Parallel Algorithm "
+ "Acceleration library, used to implicitly include the rocThrust library">;
+def hipstdpar_prim_path_EQ : Joined<["--"], "hipstdpar-prim-path=">,
+ Group<i_Group>,
+ HelpText<
+ "rocPrim path, required by the HIP Standard Parallel Algorithm "
+ "Acceleration library, used to implicitly include the rocPrim library">;
def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group<hip_Group>,
HelpText<"ROCm device library path. Alternative to rocm-path.">;
def : Joined<["--"], "hip-device-lib-path=">, Alias<rocm_device_lib_path_EQ>;
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 84b8fc7685fed42..77328e1f99e5021 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -767,7 +767,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
[](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isHIP(I.first);
}) ||
- C.getInputArgs().hasArg(options::OPT_hip_link);
+ C.getInputArgs().hasArg(options::OPT_hip_link) ||
+ C.getInputArgs().hasArg(options::OPT_hipstdpar);
if (IsCuda && IsHIP) {
Diag(clang::diag::err_drv_mix_cuda_hip);
return;
@@ -2705,6 +2706,10 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
}
}
+ if ((Ty == types::TY_C || Ty == types::TY_CXX) &&
+ Args.hasArgNoClaim(options::OPT_hipstdpar))
+ Ty = types::TY_HIP;
+
if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true))
Inputs.push_back(std::make_pair(Ty, A));
@@ -3915,6 +3920,11 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
if (FinalPhase == phases::Link) {
+ if (Args.hasArgNoClaim(options::OPT_hipstdpar)) {
+ Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link));
+ Args.AddFlagArg(nullptr,
+ getOpts().getOption(options::OPT_frtlib_add_rpath));
+ }
// Emitting LLVM while linking disabled except in HIPAMD Toolchain
if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
Diag(clang::diag::err_drv_emit_llvm_link);
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 3f08c0ef5d6f000..d4b33ad551c433e 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -329,6 +329,20 @@ RocmInstallationDetector::RocmInstallationDetector(
RocmDeviceLibPathArg =
Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
+ HIPStdParPathArg =
+ Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ);
+ HasHIPStdParLibrary =
+ !HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg +
+ "/hipstdpar_lib.hpp");
+ HIPRocThrustPathArg =
+ Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ);
+ HasRocThrustLibrary = !HIPRocThrustPathArg.empty() &&
+ D.getVFS().exists(HIPRocThrustPathArg + "/thrust");
+ HIPRocPrimPathArg =
+ Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ);
+ HasRocPrimLibrary = !HIPRocPrimPathArg.empty() &&
+ D.getVFS().exists(HIPRocPrimPathArg + "/rocprim");
+
if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
HIPVersionArg = A->getValue();
unsigned Major = ~0U;
@@ -507,6 +521,7 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) &&
!DriverArgs.hasArg(options::OPT_nohipwrapperinc);
+ bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar);
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
// HIP header includes standard library wrapper headers under clang
@@ -529,8 +544,45 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
CC1Args.push_back(DriverArgs.MakeArgString(P));
}
- if (DriverArgs.hasArg(options::OPT_nogpuinc))
+ const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() {
+ if (!hasHIPStdParLibrary()) {
+ D.Diag(diag::err_drv_no_hipstdpar_lib);
+ return;
+ }
+ if (!HasRocThrustLibrary &&
+ !D.getVFS().exists(getIncludePath() + "/thrust")) {
+ D.Diag(diag::err_drv_no_hipstdpar_thrust_lib);
+ return;
+ }
+ if (!HasRocPrimLibrary &&
+ !D.getVFS().exists(getIncludePath() + "/rocprim")) {
+ D.Diag(diag::err_drv_no_hipstdpar_prim_lib);
+ return;
+ }
+
+ const char *ThrustPath;
+ if (HasRocThrustLibrary)
+ ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg);
+ else
+ ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust");
+
+ const char *PrimPath;
+ if (HasRocPrimLibrary)
+ PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg);
+ else
+ PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim");
+
+ CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath,
+ "-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg),
+ "-include", "hipstdpar_lib.hpp"});
+ };
+
+ if (DriverArgs.hasArg(options::OPT_nogpuinc)) {
+ if (HasHipStdPar)
+ HandleHipStdPar();
+
return;
+ }
if (!hasHIPRuntime()) {
D.Diag(diag::err_drv_no_hip_runtime);
@@ -541,6 +593,8 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
if (UsesRuntimeWrapper)
CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
+ if (HasHipStdPar)
+ HandleHipStdPar();
}
void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a694d00b569a590..129adfb9fcc74d1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6580,6 +6580,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fhip-new-launch-api");
Args.addOptInFlag(CmdArgs, options::OPT_fgpu_allow_device_init,
options::OPT_fno_gpu_allow_device_init);
+ Args.AddLastArg(CmdArgs, options::OPT_hipstdpar);
+ Args.AddLastArg(CmdArgs, options::OPT_hipstdpar_interpose_alloc);
Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name,
options::OPT_fno_hip_kernel_arg_name);
}
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 3fc5669c06c3994..ccb36a6c846c806 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -113,6 +113,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
"--no-undefined",
"-shared",
"-plugin-opt=-amdgpu-internalize-symbols"};
+ if (Args.hasArg(options::OPT_hipstdpar))
+ LldArgs.push_back("-plugin-opt=-amdgpu-enable-hipstdpar");
auto &TC = getToolChain();
auto &D = TC.getDriver();
@@ -242,6 +244,8 @@ void HIPAMDToolChain::addClangTargetOptions(
if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))
CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
+ if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
+ CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
StringRef MaxThreadsPerBlock =
DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/lib/Driver/ToolChains/ROCm.h
index 554d8a6929ac592..dceb0ab03669332 100644
--- a/clang/lib/Driver/ToolChains/ROCm.h
+++ b/clang/lib/Driver/ToolChains/ROCm.h
@@ -77,6 +77,9 @@ class RocmInstallationDetector {
const Driver &D;
bool HasHIPRuntime = false;
bool HasDeviceLibrary = false;
+ bool HasHIPStdParLibrary = false;
+ bool HasRocThrustLibrary = false;
+ bool HasRocPrimLibrary = false;
// Default version if not detected or specified.
const unsigned DefaultVersionMajor = 3;
@@ -96,6 +99,13 @@ class RocmInstallationDetector {
std::vector<std::string> RocmDeviceLibPathArg;
// HIP runtime path specified by --hip-path.
StringRef HIPPathArg;
+ // HIP Standard Parallel Algorithm acceleration library specified by
+ // --hipstdpar-path
+ StringRef HIPStdParPathArg;
+ // rocThrust algorithm library specified by --hipstdpar-thrust-path
+ StringRef HIPRocThrustPathArg;
+ // rocPrim algorithm library specified by --hipstdpar-prim-path
+ StringRef HIPRocPrimPathArg;
// HIP version specified by --hip-version.
StringRef HIPVersionArg;
// Wheter -nogpulib is specified.
@@ -180,6 +190,9 @@ class RocmInstallationDetector {
/// Check whether we detected a valid ROCm device library.
bool hasDeviceLibrary() const { return HasDeviceLibrary; }
+ /// Check whether we detected a valid HIP STDPAR Acceleration library.
+ bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; }
+
/// Print information about the detected ROCm installation.
void print(raw_ostream &OS) const;
diff --git a/clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp b/clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp
new file mode 100644
index 000000000000000..e69de29bb2d1d64
diff --git a/clang/test/Driver/Inputs/hipstdpar/rocprim/.keep b/clang/test/Driver/Inputs/hipstdpar/rocprim/.keep
new file mode 100644
index 000000000000000..e69de29bb2d1d64
diff --git a/clang/test/Driver/Inputs/hipstdpar/thrust/.keep b/clang/test/Driver/Inputs/hipstdpar/thrust/.keep
new file mode 100644
index 000000000000000..e69de29bb2d1d64
diff --git a/clang/test/Driver/hipstdpar.c b/clang/test/Driver/hipstdpar.c
new file mode 100644
index 000000000000000..b4fd815d9a76482
--- /dev/null
+++ b/clang/test/Driver/hipstdpar.c
@@ -0,0 +1,18 @@
+// RUN: not %clang -### --hipstdpar -nogpulib -nogpuinc --compile %s 2>&1 | \
+// RUN: FileCheck --check-prefix=HIPSTDPAR-MISSING-LIB %s
+// RUN: %clang -### --hipstdpar --hipstdpar-path=%S/Inputs/hipstdpar \
+// RUN: --hipstdpar-thrust-path=%S/Inputs/hipstdpar/thrust \
+// RUN: --hipstdpar-prim-path=%S/Inputs/hipstdpar/rocprim \
+// RUN: -nogpulib -nogpuinc --compile %s 2>&1 | \
+// RUN: FileCheck --check-prefix=HIPSTDPAR-COMPILE %s
+// RUN: touch %t.o
+// RUN: %clang -### --hipstdpar %t.o 2>&1 | FileCheck --check-prefix=HIPSTDPAR-LINK %s
+
+// HIPSTDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--hipstdpar-path'
+// HIPSTDPAR-COMPILE: "-x" "hip"
+// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/thrust}}"
+// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/rocprim}}"
+// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/Inputs/hipstdpar}}"
+// HIPSTDPAR-COMPILE: "-include" "hipstdpar_lib.hpp"
+// HIPSTDPAR-LINK: "-rpath"
+// HIPSTDPAR-LINK: "-l{{.*hip.*}}"
More information about the cfe-commits
mailing list