[clang] 9a40858 - [HIP][Clang][Driver] Add Driver support for `hipstdpar`

Alex Voicu via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 3 05:15:00 PDT 2023


Author: Alex Voicu
Date: 2023-10-03T13:14:46+01:00
New Revision: 9a408588d1b8b7899eff593c537de539a4a12651

URL: https://github.com/llvm/llvm-project/commit/9a408588d1b8b7899eff593c537de539a4a12651
DIFF: https://github.com/llvm/llvm-project/commit/9a408588d1b8b7899eff593c537de539a4a12651.diff

LOG: [HIP][Clang][Driver] Add Driver support for `hipstdpar`

This patch adds the Driver changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. What this change does can be summed up as follows:

- add two flags, one for enabling `hipstdpar` compilation, the second enabling the optional allocation interposition mode;
- the flags correspond to new LangOpt members;
- if we are compiling or linking with --hipstdpar, we enable HIP; in the compilation case C and C++ inputs are treated as HIP inputs;
- the ROCm / AMDGPU driver is augmented to look for and include an implementation detail forwarding header; we error out if the user requested `hipstdpar` but the header or its dependencies cannot be found.

Tests for the behaviour described above are also added.

Reviewed by: MaskRay, yaxunl

Differential Revision: https://reviews.llvm.org/D155775

Added: 
    clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp
    clang/test/Driver/Inputs/hipstdpar/rocprim/.keep
    clang/test/Driver/Inputs/hipstdpar/thrust/.keep
    clang/test/Driver/hipstdpar.c

Modified: 
    clang/include/clang/Basic/DiagnosticDriverKinds.td
    clang/include/clang/Basic/LangOptions.def
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/Driver.cpp
    clang/lib/Driver/ToolChains/AMDGPU.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Driver/ToolChains/HIPAMD.cpp
    clang/lib/Driver/ToolChains/ROCm.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 2a48c063e243ee0..91a95def4f80de4 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -70,6 +70,16 @@ def err_drv_no_rocm_device_lib : Error<
 def err_drv_no_hip_runtime : Error<
   "cannot find HIP runtime; provide its path via '--rocm-path', or pass "
   "'-nogpuinc' to build without HIP runtime">;
+def err_drv_no_hipstdpar_lib : Error<
+  "cannot find HIP Standard Parallelism Acceleration library; provide it via "
+  "'--hipstdpar-path'">;
+def err_drv_no_hipstdpar_thrust_lib : Error<
+  "cannot find rocThrust, which is required by the HIP Standard Parallelism "
+  "Acceleration library; provide it via "
+  "'--hipstdpar-thrust-path'">;
+def err_drv_no_hipstdpar_prim_lib : Error<
+  "cannot find rocPrim, which is required by the HIP Standard Parallelism "
+  "Acceleration library; provide it via '--hipstdpar-prim-path'">;
 
 def err_drv_no_hipspv_device_lib : Error<
   "cannot find HIP device library%select{| for %1}0; provide its path via "

diff  --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 28c9bcec3ee60f1..c0ea4ecb9806a5b 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -280,6 +280,8 @@ ENUM_LANGOPT(SYCLVersion  , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
 
 LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
 LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)")
+LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)")
+LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)")
 
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index ee4e23f335e7875..ff2130c93f28ea0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1258,6 +1258,32 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<hip_Group>,
   HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group<hip_Group>,
   HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">;
+def hipstdpar : Flag<["--"], "hipstdpar">,
+  Visibility<[ClangOption, CC1Option]>,
+  Group<CompileOnly_Group>,
+  HelpText<"Enable HIP acceleration for standard parallel algorithms">,
+  MarshallingInfoFlag<LangOpts<"HIPStdPar">>;
+def hipstdpar_interpose_alloc : Flag<["--"], "hipstdpar-interpose-alloc">,
+  Visibility<[ClangOption, CC1Option]>,
+  Group<CompileOnly_Group>,
+  HelpText<"Replace all memory allocation / deallocation calls with "
+           "hipManagedMalloc / hipFree equivalents">,
+  MarshallingInfoFlag<LangOpts<"HIPStdParInterposeAlloc">>;
+// TODO: use MarshallingInfo here
+def hipstdpar_path_EQ : Joined<["--"], "hipstdpar-path=">, Group<i_Group>,
+  HelpText<
+    "HIP Standard Parallel Algorithm Acceleration library path, used for "
+    "finding and implicitly including the library header">;
+def hipstdpar_thrust_path_EQ : Joined<["--"], "hipstdpar-thrust-path=">,
+  Group<i_Group>,
+  HelpText<
+    "rocThrust path, required by the HIP Standard Parallel Algorithm "
+    "Acceleration library, used to implicitly include the rocThrust library">;
+def hipstdpar_prim_path_EQ : Joined<["--"], "hipstdpar-prim-path=">,
+  Group<i_Group>,
+  HelpText<
+    "rocPrim path, required by the HIP Standard Parallel Algorithm "
+    "Acceleration library, used to implicitly include the rocPrim library">;
 def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group<hip_Group>,
   HelpText<"ROCm device library path. Alternative to rocm-path.">;
 def : Joined<["--"], "hip-device-lib-path=">, Alias<rocm_device_lib_path_EQ>;

diff  --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 84b8fc7685fed42..77328e1f99e5021 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -767,7 +767,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
                    [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
                      return types::isHIP(I.first);
                    }) ||
-      C.getInputArgs().hasArg(options::OPT_hip_link);
+      C.getInputArgs().hasArg(options::OPT_hip_link) ||
+      C.getInputArgs().hasArg(options::OPT_hipstdpar);
   if (IsCuda && IsHIP) {
     Diag(clang::diag::err_drv_mix_cuda_hip);
     return;
@@ -2705,6 +2706,10 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
         }
       }
 
+      if ((Ty == types::TY_C || Ty == types::TY_CXX) &&
+          Args.hasArgNoClaim(options::OPT_hipstdpar))
+        Ty = types::TY_HIP;
+
       if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true))
         Inputs.push_back(std::make_pair(Ty, A));
 
@@ -3915,6 +3920,11 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
   phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
 
   if (FinalPhase == phases::Link) {
+    if (Args.hasArgNoClaim(options::OPT_hipstdpar)) {
+      Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link));
+      Args.AddFlagArg(nullptr,
+                      getOpts().getOption(options::OPT_frtlib_add_rpath));
+    }
     // Emitting LLVM while linking disabled except in HIPAMD Toolchain
     if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
       Diag(clang::diag::err_drv_emit_llvm_link);

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 3f08c0ef5d6f000..d4b33ad551c433e 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -329,6 +329,20 @@ RocmInstallationDetector::RocmInstallationDetector(
   RocmDeviceLibPathArg =
       Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
   HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
+  HIPStdParPathArg =
+    Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ);
+  HasHIPStdParLibrary =
+    !HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg +
+                                                   "/hipstdpar_lib.hpp");
+  HIPRocThrustPathArg =
+    Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ);
+  HasRocThrustLibrary = !HIPRocThrustPathArg.empty() &&
+                        D.getVFS().exists(HIPRocThrustPathArg + "/thrust");
+  HIPRocPrimPathArg =
+    Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ);
+  HasRocPrimLibrary = !HIPRocPrimPathArg.empty() &&
+                      D.getVFS().exists(HIPRocPrimPathArg + "/rocprim");
+
   if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
     HIPVersionArg = A->getValue();
     unsigned Major = ~0U;
@@ -507,6 +521,7 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
                                                  ArgStringList &CC1Args) const {
   bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) &&
                             !DriverArgs.hasArg(options::OPT_nohipwrapperinc);
+  bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar);
 
   if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
     // HIP header includes standard library wrapper headers under clang
@@ -529,8 +544,45 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
     CC1Args.push_back(DriverArgs.MakeArgString(P));
   }
 
-  if (DriverArgs.hasArg(options::OPT_nogpuinc))
+  const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() {
+    if (!hasHIPStdParLibrary()) {
+      D.Diag(diag::err_drv_no_hipstdpar_lib);
+      return;
+    }
+    if (!HasRocThrustLibrary &&
+        !D.getVFS().exists(getIncludePath() + "/thrust")) {
+      D.Diag(diag::err_drv_no_hipstdpar_thrust_lib);
+      return;
+    }
+    if (!HasRocPrimLibrary &&
+        !D.getVFS().exists(getIncludePath() + "/rocprim")) {
+      D.Diag(diag::err_drv_no_hipstdpar_prim_lib);
+      return;
+    }
+
+    const char *ThrustPath;
+    if (HasRocThrustLibrary)
+      ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg);
+    else
+      ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust");
+
+    const char *PrimPath;
+    if (HasRocPrimLibrary)
+      PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg);
+    else
+      PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim");
+
+    CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath,
+                    "-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg),
+                    "-include", "hipstdpar_lib.hpp"});
+  };
+
+  if (DriverArgs.hasArg(options::OPT_nogpuinc)) {
+    if (HasHipStdPar)
+      HandleHipStdPar();
+
     return;
+  }
 
   if (!hasHIPRuntime()) {
     D.Diag(diag::err_drv_no_hip_runtime);
@@ -541,6 +593,8 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
   CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
   if (UsesRuntimeWrapper)
     CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
+  if (HasHipStdPar)
+    HandleHipStdPar();
 }
 
 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a694d00b569a590..129adfb9fcc74d1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6580,6 +6580,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-fhip-new-launch-api");
     Args.addOptInFlag(CmdArgs, options::OPT_fgpu_allow_device_init,
                       options::OPT_fno_gpu_allow_device_init);
+    Args.AddLastArg(CmdArgs, options::OPT_hipstdpar);
+    Args.AddLastArg(CmdArgs, options::OPT_hipstdpar_interpose_alloc);
     Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name,
                       options::OPT_fno_hip_kernel_arg_name);
   }

diff  --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 3fc5669c06c3994..ccb36a6c846c806 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -113,6 +113,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
                         "--no-undefined",
                         "-shared",
                         "-plugin-opt=-amdgpu-internalize-symbols"};
+  if (Args.hasArg(options::OPT_hipstdpar))
+    LldArgs.push_back("-plugin-opt=-amdgpu-enable-hipstdpar");
 
   auto &TC = getToolChain();
   auto &D = TC.getDriver();
@@ -242,6 +244,8 @@ void HIPAMDToolChain::addClangTargetOptions(
   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
                           false))
     CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
+  if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
+    CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
 
   StringRef MaxThreadsPerBlock =
       DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);

diff  --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/lib/Driver/ToolChains/ROCm.h
index 554d8a6929ac592..dceb0ab03669332 100644
--- a/clang/lib/Driver/ToolChains/ROCm.h
+++ b/clang/lib/Driver/ToolChains/ROCm.h
@@ -77,6 +77,9 @@ class RocmInstallationDetector {
   const Driver &D;
   bool HasHIPRuntime = false;
   bool HasDeviceLibrary = false;
+  bool HasHIPStdParLibrary = false;
+  bool HasRocThrustLibrary = false;
+  bool HasRocPrimLibrary = false;
 
   // Default version if not detected or specified.
   const unsigned DefaultVersionMajor = 3;
@@ -96,6 +99,13 @@ class RocmInstallationDetector {
   std::vector<std::string> RocmDeviceLibPathArg;
   // HIP runtime path specified by --hip-path.
   StringRef HIPPathArg;
+  // HIP Standard Parallel Algorithm acceleration library specified by
+  // --hipstdpar-path
+  StringRef HIPStdParPathArg;
+  // rocThrust algorithm library specified by --hipstdpar-thrust-path
+  StringRef HIPRocThrustPathArg;
+  // rocPrim algorithm library specified by --hipstdpar-prim-path
+  StringRef HIPRocPrimPathArg;
   // HIP version specified by --hip-version.
   StringRef HIPVersionArg;
   // Wheter -nogpulib is specified.
@@ -180,6 +190,9 @@ class RocmInstallationDetector {
   /// Check whether we detected a valid ROCm device library.
   bool hasDeviceLibrary() const { return HasDeviceLibrary; }
 
+  /// Check whether we detected a valid HIP STDPAR Acceleration library.
+  bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; }
+
   /// Print information about the detected ROCm installation.
   void print(raw_ostream &OS) const;
 

diff  --git a/clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp b/clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp
new file mode 100644
index 000000000000000..e69de29bb2d1d64

diff  --git a/clang/test/Driver/Inputs/hipstdpar/rocprim/.keep b/clang/test/Driver/Inputs/hipstdpar/rocprim/.keep
new file mode 100644
index 000000000000000..e69de29bb2d1d64

diff  --git a/clang/test/Driver/Inputs/hipstdpar/thrust/.keep b/clang/test/Driver/Inputs/hipstdpar/thrust/.keep
new file mode 100644
index 000000000000000..e69de29bb2d1d64

diff  --git a/clang/test/Driver/hipstdpar.c b/clang/test/Driver/hipstdpar.c
new file mode 100644
index 000000000000000..b4fd815d9a76482
--- /dev/null
+++ b/clang/test/Driver/hipstdpar.c
@@ -0,0 +1,18 @@
+// RUN: not %clang -### --hipstdpar -nogpulib -nogpuinc --compile %s 2>&1 | \
+// RUN:   FileCheck --check-prefix=HIPSTDPAR-MISSING-LIB %s
+// RUN: %clang -### --hipstdpar --hipstdpar-path=%S/Inputs/hipstdpar \
+// RUN:   --hipstdpar-thrust-path=%S/Inputs/hipstdpar/thrust \
+// RUN:   --hipstdpar-prim-path=%S/Inputs/hipstdpar/rocprim \
+// RUN:   -nogpulib -nogpuinc --compile %s 2>&1 | \
+// RUN:   FileCheck --check-prefix=HIPSTDPAR-COMPILE %s
+// RUN: touch %t.o
+// RUN: %clang -### --hipstdpar %t.o 2>&1 | FileCheck --check-prefix=HIPSTDPAR-LINK %s
+
+// HIPSTDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--hipstdpar-path'
+// HIPSTDPAR-COMPILE: "-x" "hip"
+// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/thrust}}"
+// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/rocprim}}"
+// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/Inputs/hipstdpar}}"
+// HIPSTDPAR-COMPILE: "-include" "hipstdpar_lib.hpp"
+// HIPSTDPAR-LINK: "-rpath"
+// HIPSTDPAR-LINK: "-l{{.*hip.*}}"


        


More information about the cfe-commits mailing list