[clang] 4593e41 - AMDGPU: Teach toolchain to link rocm device libs

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 10 10:39:17 PDT 2020


Author: Matt Arsenault
Date: 2020-04-10T13:37:32-04:00
New Revision: 4593e4131affa84e61d7b6844be409ba46d29f11

URL: https://github.com/llvm/llvm-project/commit/4593e4131affa84e61d7b6844be409ba46d29f11
DIFF: https://github.com/llvm/llvm-project/commit/4593e4131affa84e61d7b6844be409ba46d29f11.diff

LOG: AMDGPU: Teach toolchain to link rocm device libs

Currently the library is separately linked, but this isn't correct to
implement fast math flags correctly. Each module should get the
version of the library appropriate for its combination of fast math
and related flags, with the attributes propagated into its functions
and internalized.

HIP already maintains the list of libraries, but this is not used for
OpenCL. Unfortunately, HIP uses a separate --hip-device-lib argument,
despite both languages using the same bitcode library. Eventually
these two searches need to be merged.

An additional problem is there are 3 different locations the libraries
are installed, depending on which build is used. This also needs to be
consolidated (or at least the search logic needs to deal with this
unnecessary complexity).

Added: 
    clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc
    clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc
    clang/test/Driver/rocm-detect.cl
    clang/test/Driver/rocm-device-libs.cl
    clang/test/Driver/rocm-not-found.cl

Modified: 
    clang/include/clang/Basic/DiagnosticDriverKinds.td
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/Driver.cpp
    clang/lib/Driver/ToolChains/AMDGPU.cpp
    clang/lib/Driver/ToolChains/AMDGPU.h
    clang/lib/Driver/ToolChains/HIP.cpp
    clang/lib/Driver/ToolChains/HIP.h
    clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
    clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
    clang/test/Driver/amdgpu-visibility.cl
    llvm/include/llvm/Support/TargetParser.h
    llvm/lib/Support/TargetParser.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index cba59cb3b66d..b28ee88f3d87 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -55,6 +55,14 @@ def err_drv_no_cuda_installation : Error<
 def err_drv_no_cuda_libdevice : Error<
   "cannot find libdevice for %0. Provide path to 
diff erent CUDA installation "
   "via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;
+
+def err_drv_no_rocm_installation : Error<
+  "cannot find ROCm installation.  Provide its path via --rocm-path, or pass "
+  "-nogpulib.">;
+def err_drv_no_rocm_device_lib : Error<
+  "cannot find device library for %0. Provide path to 
diff erent ROCm installation "
+  "via --rocm-path, or pass -nogpulib to build without linking default libraries.">;
+
 def err_drv_cuda_version_unsupported : Error<
   "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
   "but installation at %3 is %4. Use --cuda-path to specify a 
diff erent CUDA "

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 02875f68ebfe..661aad49a8ee 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -608,6 +608,8 @@ def : Flag<["-"], "fno-cuda-rdc">, Alias<fno_gpu_rdc>;
 def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
   HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;
 def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
+def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<i_Group>,
+  HelpText<"ROCm installation path">;
 def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group<Link_Group>,
   HelpText<"HIP device library path">;
 def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,

diff  --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 9e1f41345ea2..7d82c8faa573 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4857,6 +4857,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
       TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
       break;
     case llvm::Triple::AMDHSA:
+      TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
+      break;
     case llvm::Triple::AMDPAL:
     case llvm::Triple::Mesa3D:
       TC = std::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 2cec0dc9de22..f09578f4769e 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -12,7 +12,8 @@
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "llvm/Option/ArgList.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
 
 using namespace clang::driver;
 using namespace clang::driver::tools;
@@ -20,6 +21,162 @@ using namespace clang::driver::toolchains;
 using namespace clang;
 using namespace llvm::opt;
 
+RocmInstallationDetector::RocmInstallationDetector(
+    const Driver &D, const llvm::Triple &HostTriple,
+    const llvm::opt::ArgList &Args)
+    : D(D) {
+  struct Candidate {
+    std::string Path;
+    bool StrictChecking;
+
+    Candidate(std::string Path, bool StrictChecking = false)
+        : Path(Path), StrictChecking(StrictChecking) {}
+  };
+
+  SmallVector<Candidate, 4> Candidates;
+
+  if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) {
+    Candidates.emplace_back(
+        Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str());
+  } else {
+    // Try to find relative to the compiler binary.
+    const char *InstallDir = D.getInstalledDir();
+
+    // Check both a normal Unix prefix position of the clang binary, as well as
+    // the Windows-esque layout the ROCm packages use with the host architecture
+    // subdirectory of bin.
+
+    StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
+    if (ParentDir == HostTriple.getArchName())
+      ParentDir = llvm::sys::path::parent_path(ParentDir);
+
+    if (ParentDir == "bin") {
+      Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
+                              /*StrictChecking=*/true);
+    }
+
+    Candidates.emplace_back(D.SysRoot + "/opt/rocm");
+  }
+
+  bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);
+
+  for (const auto &Candidate : Candidates) {
+    InstallPath = Candidate.Path;
+    if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
+      continue;
+
+    // FIXME: The install path situation is a real mess.
+
+    // For a cmake install, these are placed directly in
+    // ${INSTALL_PREFIX}/lib
+
+    // In the separate OpenCL builds, the bitcode libraries are placed in
+    // ${OPENCL_ROOT}/lib/x86_64/bitcode/*
+
+    // For the rocm installed packages, these are placed at
+    // /opt/rocm/opencl/lib/x86_64/bitcode
+
+    // An additional copy is installed, in scattered locations between
+    // /opt/rocm/hcc/rocdl/oclc
+    // /opt/rocm/hcc/rocdl/ockl
+    // /opt/rocm/hcc/rocdl/lib
+    //
+    // Yet another complete set is installed to
+    // /opt/rocm/hcc/rocdl/lib
+
+    // For now just recognize the opencl package layout.
+
+    // BinPath = InstallPath + "/bin";
+    llvm::sys::path::append(IncludePath, InstallPath, "include");
+    llvm::sys::path::append(LibDevicePath, InstallPath, "lib");
+
+    auto &FS = D.getVFS();
+
+    // We don't need the include path for OpenCL, since clang already ships with
+    // the default header.
+
+    bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
+    if (CheckLibDevice && !FS.exists(LibDevicePath))
+      continue;
+
+    const StringRef Suffix(".amdgcn.bc");
+
+    std::error_code EC;
+    for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
+         !EC && LI != LE; LI = LI.increment(EC)) {
+      StringRef FilePath = LI->path();
+      StringRef FileName = llvm::sys::path::filename(FilePath);
+      if (!FileName.endswith(Suffix))
+        continue;
+
+      StringRef BaseName = FileName.drop_back(Suffix.size());
+
+      if (BaseName == "ocml") {
+        OCML = FilePath;
+      } else if (BaseName == "ockl") {
+        OCKL = FilePath;
+      } else if (BaseName == "opencl") {
+        OpenCL = FilePath;
+      } else if (BaseName == "hip") {
+        HIP = FilePath;
+      } else if (BaseName == "oclc_finite_only_off") {
+        FiniteOnly.Off = FilePath;
+      } else if (BaseName == "oclc_finite_only_on") {
+        FiniteOnly.On = FilePath;
+      } else if (BaseName == "oclc_daz_opt_on") {
+        DenormalsAreZero.On = FilePath;
+      } else if (BaseName == "oclc_daz_opt_off") {
+        DenormalsAreZero.Off = FilePath;
+      } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
+        CorrectlyRoundedSqrt.On = FilePath;
+      } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
+        CorrectlyRoundedSqrt.Off = FilePath;
+      } else if (BaseName == "oclc_unsafe_math_on") {
+        UnsafeMath.On = FilePath;
+      } else if (BaseName == "oclc_unsafe_math_off") {
+        UnsafeMath.Off = FilePath;
+      } else if (BaseName == "oclc_wavefrontsize64_on") {
+        WavefrontSize64.On = FilePath;
+      } else if (BaseName == "oclc_wavefrontsize64_off") {
+        WavefrontSize64.Off = FilePath;
+      } else {
+        // Process all bitcode filenames that look like
+        // ocl_isa_version_XXX.amdgcn.bc
+        const StringRef DeviceLibPrefix = "oclc_isa_version_";
+        if (!BaseName.startswith(DeviceLibPrefix))
+          continue;
+
+        StringRef IsaVersionNumber =
+            BaseName.drop_front(DeviceLibPrefix.size());
+
+        llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
+        SmallString<8> Tmp;
+        LibDeviceMap.insert(
+            std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
+      }
+    }
+
+    if (!NoBuiltinLibs) {
+      // Check that the required non-target libraries are all available.
+      if (!allGenericLibsValid())
+        continue;
+
+      // Check that we have found at least one libdevice that we can link in if
+      // -nobuiltinlib hasn't been specified.
+      if (LibDeviceMap.empty())
+        continue;
+    }
+
+    IsValid = true;
+    break;
+  }
+}
+
+void RocmInstallationDetector::print(raw_ostream &OS) const {
+  if (isValid())
+    OS << "Found ROCm installation: " << InstallPath << '\n';
+}
+
 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
@@ -142,6 +299,12 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
                llvm::DenormalMode::getIEEE();
 }
 
+/// ROCM Toolchain
+ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
+                             const ArgList &Args)
+  : AMDGPUToolChain(D, Triple, Args),
+    RocmInstallation(D, Triple, Args) { }
+
 void AMDGPUToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
     llvm::opt::ArgStringList &CC1Args,
@@ -155,3 +318,89 @@ void AMDGPUToolChain::addClangTargetOptions(
     CC1Args.push_back("-fapply-global-visibility-to-externs");
   }
 }
+
+void ROCMToolChain::addClangTargetOptions(
+    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadingKind) const {
+  AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
+                                         DeviceOffloadingKind);
+
+  if (DriverArgs.hasArg(options::OPT_nogpulib))
+    return;
+
+  if (!RocmInstallation.isValid()) {
+    getDriver().Diag(diag::err_drv_no_rocm_installation);
+    return;
+  }
+
+  // Get the device name and canonicalize it
+  const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+  const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
+  std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
+  if (LibDeviceFile.empty()) {
+    getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch;
+    return;
+  }
+
+  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+  static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
+
+  bool Wave64 = !HasWave32 || DriverArgs.hasFlag(
+    options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
+
+  // TODO: There are way too many flags that change this. Do we need to check
+  // them all?
+  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
+             getDefaultDenormsAreZeroForTarget(Kind);
+  bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
+
+  bool UnsafeMathOpt =
+      DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
+  bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
+  bool CorrectSqrt =
+      DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
+
+  // Add the OpenCL specific bitcode library.
+  CC1Args.push_back("-mlink-builtin-bitcode");
+  CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
+
+  // Add the generic set of libraries.
+  RocmInstallation.addCommonBitcodeLibCC1Args(
+      DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
+      UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
+}
+
+void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
+    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+    StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
+    bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
+  static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(
+      getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(
+      getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(
+      DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
+}

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index e7a873efb008..87a16272d624 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -13,12 +13,154 @@
 #include "clang/Driver/Options.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/TargetParser.h"
 
 #include <map>
 
 namespace clang {
 namespace driver {
+
+/// A class to find a viable ROCM installation
+/// TODO: Generalize to handle libclc.
+class RocmInstallationDetector {
+private:
+  struct ConditionalLibrary {
+    SmallString<0> On;
+    SmallString<0> Off;
+
+    bool isValid() const {
+      return !On.empty() && !Off.empty();
+    }
+
+    StringRef get(bool Enabled) const {
+      assert(isValid());
+      return Enabled ? On : Off;
+    }
+  };
+
+  const Driver &D;
+  bool IsValid = false;
+  //RocmVersion Version = RocmVersion::UNKNOWN;
+  SmallString<0> InstallPath;
+  //SmallString<0> BinPath;
+  SmallString<0> LibPath;
+  SmallString<0> LibDevicePath;
+  SmallString<0> IncludePath;
+  llvm::StringMap<std::string> LibDeviceMap;
+
+  // Libraries that are always linked.
+  SmallString<0> OCML;
+  SmallString<0> OCKL;
+
+  // Libraries that are always linked depending on the language
+  SmallString<0> OpenCL;
+  SmallString<0> HIP;
+
+  // Libraries swapped based on compile flags.
+  ConditionalLibrary WavefrontSize64;
+  ConditionalLibrary FiniteOnly;
+  ConditionalLibrary UnsafeMath;
+  ConditionalLibrary DenormalsAreZero;
+  ConditionalLibrary CorrectlyRoundedSqrt;
+
+  bool allGenericLibsValid() const {
+    return !OCML.empty() && !OCKL.empty() && !OpenCL.empty() && !HIP.empty() &&
+           WavefrontSize64.isValid() && FiniteOnly.isValid() &&
+           UnsafeMath.isValid() && DenormalsAreZero.isValid() &&
+           CorrectlyRoundedSqrt.isValid();
+  }
+
+  // CUDA architectures for which we have raised an error in
+  // CheckRocmVersionSupportsArch.
+  mutable llvm::SmallSet<CudaArch, 4> ArchsWithBadVersion;
+
+public:
+  RocmInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
+                           const llvm::opt::ArgList &Args);
+
+  /// Add arguments needed to link default bitcode libraries.
+  void addCommonBitcodeLibCC1Args(const llvm::opt::ArgList &DriverArgs,
+                                  llvm::opt::ArgStringList &CC1Args,
+                                  StringRef LibDeviceFile, bool Wave64,
+                                  bool DAZ, bool FiniteOnly, bool UnsafeMathOpt,
+                                  bool FastRelaxedMath, bool CorrectSqrt) const;
+
+  /// Emit an error if Version does not support the given Arch.
+  ///
+  /// If either Version or Arch is unknown, does not emit an error.  Emits at
+  /// most one error per Arch.
+  void CheckRocmVersionSupportsArch(CudaArch Arch) const;
+
+  /// Check whether we detected a valid Rocm install.
+  bool isValid() const { return IsValid; }
+  /// Print information about the detected CUDA installation.
+  void print(raw_ostream &OS) const;
+
+  /// Get the detected Rocm install's version.
+  // RocmVersion version() const { return Version; }
+
+  /// Get the detected Rocm installation path.
+  StringRef getInstallPath() const { return InstallPath; }
+
+  /// Get the detected path to Rocm's bin directory.
+  // StringRef getBinPath() const { return BinPath; }
+
+  /// Get the detected Rocm Include path.
+  StringRef getIncludePath() const { return IncludePath; }
+
+  /// Get the detected Rocm library path.
+  StringRef getLibPath() const { return LibPath; }
+
+  /// Get the detected Rocm device library path.
+  StringRef getLibDevicePath() const { return LibDevicePath; }
+
+  StringRef getOCMLPath() const {
+    assert(!OCML.empty());
+    return OCML;
+  }
+
+  StringRef getOCKLPath() const {
+    assert(!OCKL.empty());
+    return OCKL;
+  }
+
+  StringRef getOpenCLPath() const {
+    assert(!OpenCL.empty());
+    return OpenCL;
+  }
+
+  StringRef getHIPPath() const {
+    assert(!HIP.empty());
+    return HIP;
+  }
+
+  StringRef getWavefrontSize64Path(bool Enabled) const {
+    return WavefrontSize64.get(Enabled);
+  }
+
+  StringRef getFiniteOnlyPath(bool Enabled) const {
+    return FiniteOnly.get(Enabled);
+  }
+
+  StringRef getUnsafeMathPath(bool Enabled) const {
+    return UnsafeMath.get(Enabled);
+  }
+
+  StringRef getDenormalsAreZeroPath(bool Enabled) const {
+    return DenormalsAreZero.get(Enabled);
+  }
+
+  StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const {
+    return CorrectlyRoundedSqrt.get(Enabled);
+  }
+
+  /// Get libdevice file for given architecture
+  std::string getLibDeviceFile(StringRef Gpu) const {
+    return LibDeviceMap.lookup(Gpu);
+  }
+};
+
 namespace tools {
 namespace amdgpu {
 
@@ -42,11 +184,9 @@ void getAMDGPUTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
 namespace toolchains {
 
 class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
-
-private:
+protected:
   const std::map<options::ID, const StringRef> OptionsDefault;
 
-protected:
   Tool *buildLinker() const override;
   const StringRef getOptionDefault(options::ID OptID) const {
     auto opt = OptionsDefault.find(OptID);
@@ -79,6 +219,19 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
       const llvm::fltSemantics *FPType = nullptr) const override;
 };
 
+class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
+private:
+  RocmInstallationDetector RocmInstallation;
+
+public:
+  ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
+                const llvm::opt::ArgList &Args);
+  void
+  addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+                        llvm::opt::ArgStringList &CC1Args,
+                        Action::OffloadKind DeviceOffloadKind) const override;
+};
+
 } // end namespace toolchains
 } // end namespace driver
 } // end namespace clang

diff  --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 8676f8b30fe6..3c56229e6bc4 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -269,7 +269,7 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
                              const ToolChain &HostTC, const ArgList &Args)
-    : AMDGPUToolChain(D, Triple, Args), HostTC(HostTC) {
+    : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
   // Lookup binaries into the driver directory, this is used to
   // discover the clang-offload-bundler executable.
   getProgramPaths().push_back(getDriver().Dir);

diff  --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h
index b6a3a2718635..01a0ee916bc0 100644
--- a/clang/lib/Driver/ToolChains/HIP.h
+++ b/clang/lib/Driver/ToolChains/HIP.h
@@ -73,7 +73,7 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
 
 namespace toolchains {
 
-class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public AMDGPUToolChain {
+class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain {
 public:
   HIPToolChain(const Driver &D, const llvm::Triple &Triple,
                 const ToolChain &HostTC, const llvm::opt::ArgList &Args);

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl b/clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
index 952b25dec790..f09981dfa0f3 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
@@ -1,5 +1,5 @@
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
 
 // CHECK-DAG: ![[DWARF_ADDRESS_SPACE_NONE:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}})
 // CHECK-DAG: ![[DWARF_ADDRESS_SPACE_LOCAL:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}}, dwarfAddressSpace: 2)

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl b/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
index 894611ea88e8..4a4c8cc54eb3 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
@@ -1,5 +1,5 @@
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
 
 // CHECK-DAG: ![[FILEVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
 // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR0]], expr: !DIExpression())

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc b/clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc
new file mode 100644
index 000000000000..e69de29bb2d1

diff  --git a/clang/test/Driver/amdgpu-visibility.cl b/clang/test/Driver/amdgpu-visibility.cl
index 19756d4744e0..fb47c5a3de67 100644
--- a/clang/test/Driver/amdgpu-visibility.cl
+++ b/clang/test/Driver/amdgpu-visibility.cl
@@ -2,6 +2,10 @@
 // RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility=protected  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED  %s
 // RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility-ms-compat  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS  %s
 
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility=protected  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED  %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility-ms-compat  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS  %s
+
 // DEFAULT-DAG: "-fvisibility" "hidden"
 // DEFAULT-DAG: "-fapply-global-visibility-to-externs"
 

diff  --git a/clang/test/Driver/rocm-detect.cl b/clang/test/Driver/rocm-detect.cl
new file mode 100644
index 000000000000..b143098c9074
--- /dev/null
+++ b/clang/test/Driver/rocm-detect.cl
@@ -0,0 +1,21 @@
+// REQUIRES: clang-driver
+// REQUIRES: amdgpu-registered-target
+
+// Make sure the appropriate device specific library is available.
+
+// We don't include every target in the test directory, so just pick a valid
+// target not included in the test.
+
+// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
+
+
+// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 -nogpulib \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s
+
+
+// GFX902-DEFAULTLIBS: error: cannot find device library for gfx902. Provide path to 
diff erent ROCm installation via --rocm-path, or pass -nogpulib to build without linking default libraries.
+
+// NODEFAULTLIBS-NOT: error: cannot find

diff  --git a/clang/test/Driver/rocm-device-libs.cl b/clang/test/Driver/rocm-device-libs.cl
new file mode 100644
index 000000000000..83641d24d156
--- /dev/null
+++ b/clang/test/Driver/rocm-device-libs.cl
@@ -0,0 +1,163 @@
+// REQUIRES: clang-driver
+// REQUIRES: amdgpu-registered-target
+
+// Test flush-denormals-to-zero enabled uses oclc_daz_opt_on
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx900 \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
+
+
+
+// Make sure the 
diff erent denormal default is respected for gfx8
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx803 \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
+
+
+
+// Make sure the non-canonical name works
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=fiji \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx900 \
+// RUN:   -cl-denorms-are-zero \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx803 \
+// RUN:   -cl-denorms-are-zero \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx803 \
+// RUN:   -cl-finite-math-only \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx803                     \
+// RUN:   -cl-fp32-correctly-rounded-divide-sqrt \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx803                     \
+// RUN:   -cl-fast-relaxed-math \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx803                     \
+// RUN:   -cl-unsafe-math-optimizations \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx1010                    \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx1011                    \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1011,WAVE32 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx1012                    \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1012,WAVE32 %s
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx1010 -mwavefrontsize64  \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE64 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64  \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
+
+// Ignore -mno-wavefrontsize64 without wave32 support
+// RUN: %clang -### -target amdgcn-amd-amdhsa       \
+// RUN:   -x cl -mcpu=gfx803  -mno-wavefrontsize64  \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs    \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX803,WAVE64 %s
+
+
+
+// COMMON: "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/opencl.amdgcn.bc"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ocml.amdgcn.bc"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ockl.amdgcn.bc"
+
+// GFX900-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_off.amdgcn.bc"
+// GFX803-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+// GFX700-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+// COMMON-DAZ-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+
+
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc"
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc"
+
+
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc"
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc"
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc"
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+// WAVE64: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_wavefrontsize64_on.amdgcn.bc"
+// WAVE32: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_wavefrontsize64_off.amdgcn.bc"
+
+
+// GFX900: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_900.amdgcn.bc"
+// GFX803: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_803.amdgcn.bc"

diff  --git a/clang/test/Driver/rocm-not-found.cl b/clang/test/Driver/rocm-not-found.cl
new file mode 100644
index 000000000000..49b6c7efcf99
--- /dev/null
+++ b/clang/test/Driver/rocm-not-found.cl
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+
+// Check that we raise an error if we're trying to compile OpenCL for amdhsa code but can't
+// find a ROCm install, unless -nogpulib was passed.
+
+// RUN: %clang -### --sysroot=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
+// RUN: %clang -### --rocm-path=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
+// ERR: cannot find ROCm installation. Provide its path via --rocm-path, or pass -nogpulib.
+
+// RUN: %clang -### -nogpulib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck %s --check-prefix OK
+// OK-NOT: cannot find ROCm installation.

diff  --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index a7e1a752d081..ef7c86122dcd 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -151,7 +151,10 @@ enum ArchFeatureKind : uint32_t {
 
   // Common features.
   FEATURE_FAST_FMA_F32 = 1 << 4,
-  FEATURE_FAST_DENORMAL_F32 = 1 << 5
+  FEATURE_FAST_DENORMAL_F32 = 1 << 5,
+
+  // Wavefront 32 is available.
+  FEATURE_WAVE32 = 1 << 6
 };
 
 StringRef getArchNameAMDGCN(GPUKind AK);

diff  --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index 84ead58b98cd..14a5d19d5273 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -99,9 +99,9 @@ constexpr GPUInfo AMDGCNGPUs[37] = {
   {{"gfx906"},    {"gfx906"},  GK_GFX906,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
   {{"gfx908"},    {"gfx908"},  GK_GFX908,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
   {{"gfx909"},    {"gfx909"},  GK_GFX909,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
-  {{"gfx1010"},   {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
-  {{"gfx1011"},   {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
-  {{"gfx1012"},   {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx1010"},   {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+  {{"gfx1011"},   {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+  {{"gfx1012"},   {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
 };
 
 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {


        


More information about the cfe-commits mailing list