[clang] 94e2b02 - Revert "[Clang][OpenMP] Add partial support for Static Device Libraries"

Thu Oct 7 07:13:59 PDT 2021

Author: Saiyedul Islam
Date: 2021-10-07T14:13:24Z
New Revision: 94e2b0258a176c7451dd8291cdf060ea048fee44

URL: https://github.com/llvm/llvm-project/commit/94e2b0258a176c7451dd8291cdf060ea048fee44
DIFF: https://github.com/llvm/llvm-project/commit/94e2b0258a176c7451dd8291cdf060ea048fee44.diff

LOG: Revert "[Clang][OpenMP] Add partial support for Static Device Libraries"

This reverts commit 4c4117089599cb5b6c6fa5635c28462ffd1bddf4.

Added: 
    

Modified: 
    clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Driver/ToolChains/CommonArgs.cpp
    clang/lib/Driver/ToolChains/CommonArgs.h
    clang/lib/Driver/ToolChains/Cuda.cpp
    clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp

Removed: 
    clang/test/Driver/Inputs/openmp_static_device_link/libFatArchive.a
    clang/test/Driver/fat_archive_amdgpu.cpp
    clang/test/Driver/fat_archive_nvptx.cpp


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 5400e26177291..135e3694434db 100644

--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -114,10 +114,6 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
     }
   }
 
-  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
-                      SubArchName,
-                      /* bitcode SDL?*/ true,
-                      /* PostClang Link? */ false);
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   const char *OutputFileName =

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 65dfe0ae0221d..369c12aea5231 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7734,28 +7734,12 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
     Triples += Action::GetOffloadKindName(CurKind);
     Triples += '-';
     Triples += CurTC->getTriple().normalize();
-    if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_Cuda) &&
+    if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_OpenMP ||
+         CurKind == Action::OFK_Cuda) &&
         CurDep->getOffloadingArch()) {
       Triples += '-';
       Triples += CurDep->getOffloadingArch();
     }
-
-    // TODO: Replace parsing of -march flag. Can be done by storing GPUArch
-    //       with each toolchain.
-    StringRef GPUArchName;
-    if (CurKind == Action::OFK_OpenMP) {
-      // Extract GPUArch from -march argument in TC argument list.
-      for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
-        auto ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
-        auto Arch = ArchStr.startswith_insensitive("-march=");
-        if (Arch) {
-          GPUArchName = ArchStr.substr(7);
-          Triples += "-";
-          break;
-        }
-      }
-      Triples += GPUArchName.str();
-    }
   }
   CmdArgs.push_back(TCArgs.MakeArgString(Triples));
 
@@ -7829,27 +7813,12 @@ void OffloadBundler::ConstructJobMultipleOutputs(
     Triples += '-';
     Triples += Dep.DependentToolChain->getTriple().normalize();
     if ((Dep.DependentOffloadKind == Action::OFK_HIP ||
+         Dep.DependentOffloadKind == Action::OFK_OpenMP ||
          Dep.DependentOffloadKind == Action::OFK_Cuda) &&
         !Dep.DependentBoundArch.empty()) {
       Triples += '-';
       Triples += Dep.DependentBoundArch;
     }
-    // TODO: Replace parsing of -march flag. Can be done by storing GPUArch
-    //       with each toolchain.
-    StringRef GPUArchName;
-    if (Dep.DependentOffloadKind == Action::OFK_OpenMP) {
-      // Extract GPUArch from -march argument in TC argument list.
-      for (uint ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
-        StringRef ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
-        auto Arch = ArchStr.startswith_insensitive("-march=");
-        if (Arch) {
-          GPUArchName = ArchStr.substr(7);
-          Triples += "-";
-          break;
-        }
-      }
-      Triples += GPUArchName.str();
-    }
   }
 
   CmdArgs.push_back(TCArgs.MakeArgString(Triples));

diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index c3abdf446cfaf..9f1895466c98d 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -34,7 +34,6 @@
 #include "clang/Driver/Util.h"
 #include "clang/Driver/XRayArgs.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -1588,292 +1587,6 @@ void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
   }
 }
 
-/// SDLSearch: Search for Static Device Library
-/// The search for SDL bitcode files is consistent with how static host
-/// libraries are discovered. That is, the -l option triggers a search for
-/// files in a set of directories called the LINKPATH. The host library search
-/// procedure looks for a specific filename in the LINKPATH.  The filename for
-/// a host library is lib<libname>.a or lib<libname>.so. For SDLs, there is an
-/// ordered-set of filenames that are searched. We call this ordered-set of
-/// filenames as SEARCH-ORDER. Since an SDL can either be device-type specific,
-/// architecture specific, or generic across all architectures, a naming
-/// convention and search order is used where the file name embeds the
-/// architecture name <arch-name> (nvptx or amdgcn) and the GPU device type
-/// <device-name> such as sm_30 and gfx906. <device-name> is absent in case of
-/// device-independent SDLs. To reduce congestion in host library directories,
-/// the search first looks for files in the “libdevice” subdirectory. SDLs that
-/// are bc files begin with the prefix “lib”.
-///
-/// Machine-code SDLs can also be managed as an archive (*.a file). The
-/// convention has been to use the prefix “lib”. To avoid confusion with host
-/// archive libraries, we use prefix "libbc-" for the bitcode SDL archives.
-///
-bool tools::SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
-                      llvm::opt::ArgStringList &CC1Args,
-                      SmallVector<std::string, 8> LibraryPaths, std::string Lib,
-                      StringRef Arch, StringRef Target, bool isBitCodeSDL,
-                      bool postClangLink) {
-  SmallVector<std::string, 12> SDLs;
-
-  std::string LibDeviceLoc = "/libdevice";
-  std::string LibBcPrefix = "/libbc-";
-  std::string LibPrefix = "/lib";
-
-  if (isBitCodeSDL) {
-    // SEARCH-ORDER for Bitcode SDLs:
-    //       libdevice/libbc-<libname>-<arch-name>-<device-type>.a
-    //       libbc-<libname>-<arch-name>-<device-type>.a
-    //       libdevice/libbc-<libname>-<arch-name>.a
-    //       libbc-<libname>-<arch-name>.a
-    //       libdevice/libbc-<libname>.a
-    //       libbc-<libname>.a
-    //       libdevice/lib<libname>-<arch-name>-<device-type>.bc
-    //       lib<libname>-<arch-name>-<device-type>.bc
-    //       libdevice/lib<libname>-<arch-name>.bc
-    //       lib<libname>-<arch-name>.bc
-    //       libdevice/lib<libname>.bc
-    //       lib<libname>.bc
-
-    for (StringRef Base : {LibBcPrefix, LibPrefix}) {
-      const auto *Ext = Base.contains(LibBcPrefix) ? ".a" : ".bc";
-
-      for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
-                          Twine(Lib + "-" + Arch).str(), Twine(Lib).str()}) {
-        SDLs.push_back(Twine(LibDeviceLoc + Base + Suffix + Ext).str());
-        SDLs.push_back(Twine(Base + Suffix + Ext).str());
-      }
-    }
-  } else {
-    // SEARCH-ORDER for Machine-code SDLs:
-    //    libdevice/lib<libname>-<arch-name>-<device-type>.a
-    //    lib<libname>-<arch-name>-<device-type>.a
-    //    libdevice/lib<libname>-<arch-name>.a
-    //    lib<libname>-<arch-name>.a
-
-    const auto *Ext = ".a";
-
-    for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
-                        Twine(Lib + "-" + Arch).str()}) {
-      SDLs.push_back(Twine(LibDeviceLoc + LibPrefix + Suffix + Ext).str());
-      SDLs.push_back(Twine(LibPrefix + Suffix + Ext).str());
-    }
-  }
-
-  // The CUDA toolchain does not use a global device llvm-link before the LLVM
-  // backend generates ptx. So currently, the use of bitcode SDL for nvptx is
-  // only possible with post-clang-cc1 linking. Clang cc1 has a feature that
-  // will link libraries after clang compilation while the LLVM IR is still in
-  // memory. This utilizes a clang cc1 option called “-mlink-builtin-bitcode”.
-  // This is a clang -cc1 option that is generated by the clang driver. The
-  // option value must a full path to an existing file.
-  bool FoundSDL = false;
-  for (auto LPath : LibraryPaths) {
-    for (auto SDL : SDLs) {
-      auto FullName = Twine(LPath + SDL).str();
-      if (llvm::sys::fs::exists(FullName)) {
-        if (postClangLink)
-          CC1Args.push_back("-mlink-builtin-bitcode");
-        CC1Args.push_back(DriverArgs.MakeArgString(FullName));
-        FoundSDL = true;
-        break;
-      }
-    }
-    if (FoundSDL)
-      break;
-  }
-  return FoundSDL;
-}
-
-/// Search if a user provided archive file lib<libname>.a exists in any of
-/// the library paths. If so, add a new command to clang-offload-bundler to
-/// unbundle this archive and create a temporary device specific archive. Name
-/// of this SDL is passed to the llvm-link (for amdgcn) or to the
-/// clang-nvlink-wrapper (for nvptx) commands by the driver.
-bool tools::GetSDLFromOffloadArchive(
-    Compilation &C, const Driver &D, const Tool &T, const JobAction &JA,
-    const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs,
-    llvm::opt::ArgStringList &CC1Args, SmallVector<std::string, 8> LibraryPaths,
-    StringRef Lib, StringRef Arch, StringRef Target, bool isBitCodeSDL,
-    bool postClangLink) {
-
-  // We don't support bitcode archive bundles for nvptx
-  if (isBitCodeSDL && Arch.contains("nvptx"))
-    return false;
-
-  bool FoundAOB = false;
-  SmallVector<std::string, 2> AOBFileNames;
-  std::string ArchiveOfBundles;
-  for (auto LPath : LibraryPaths) {
-    ArchiveOfBundles.clear();
-
-    AOBFileNames.push_back(Twine(LPath + "/libdevice/lib" + Lib + ".a").str());
-    AOBFileNames.push_back(Twine(LPath + "/lib" + Lib + ".a").str());
-
-    for (auto AOB : AOBFileNames) {
-      if (llvm::sys::fs::exists(AOB)) {
-        ArchiveOfBundles = AOB;
-        FoundAOB = true;
-        break;
-      }
-    }
-
-    if (!FoundAOB)
-      continue;
-
-    StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
-    std::string OutputLib = D.GetTemporaryPath(
-        Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a");
-
-    C.addTempFile(C.getArgs().MakeArgString(OutputLib.c_str()));
-
-    ArgStringList CmdArgs;
-    SmallString<128> DeviceTriple;
-    DeviceTriple += Action::GetOffloadKindName(JA.getOffloadingDeviceKind());
-    DeviceTriple += '-';
-    std::string NormalizedTriple = T.getToolChain().getTriple().normalize();
-    DeviceTriple += NormalizedTriple;
-    if (!Target.empty()) {
-      DeviceTriple += '-';
-      DeviceTriple += Target;
-    }
-
-    std::string UnbundleArg("-unbundle");
-    std::string TypeArg("-type=a");
-    std::string InputArg("-inputs=" + ArchiveOfBundles);
-    std::string OffloadArg("-targets=" + std::string(DeviceTriple));
-    std::string OutputArg("-outputs=" + OutputLib);
-
-    const char *UBProgram = DriverArgs.MakeArgString(
-        T.getToolChain().GetProgramPath("clang-offload-bundler"));
-
-    ArgStringList UBArgs;
-    UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg.c_str()));
-    UBArgs.push_back(C.getArgs().MakeArgString(TypeArg.c_str()));
-    UBArgs.push_back(C.getArgs().MakeArgString(InputArg.c_str()));
-    UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg.c_str()));
-    UBArgs.push_back(C.getArgs().MakeArgString(OutputArg.c_str()));
-
-    // Add this flag to not exit from clang-offload-bundler if no compatible
-    // code object is found in heterogenous archive library.
-    std::string AdditionalArgs("-allow-missing-bundles");
-    UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs.c_str()));
-
-    C.addCommand(std::make_unique<Command>(
-        JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs,
-        InputInfo(&JA, C.getArgs().MakeArgString(OutputLib.c_str()))));
-    if (postClangLink)
-      CC1Args.push_back("-mlink-builtin-bitcode");
-
-    CC1Args.push_back(DriverArgs.MakeArgString(OutputLib));
-    break;
-  }
-
-  return FoundAOB;
-}
-
-// Wrapper function used by driver for adding SDLs during link phase.
-void tools::AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
-                                const JobAction &JA,
-                                const InputInfoList &Inputs,
-                                const llvm::opt::ArgList &DriverArgs,
-                                llvm::opt::ArgStringList &CC1Args,
-                                StringRef Arch, StringRef Target,
-                                bool isBitCodeSDL, bool postClangLink) {
-  AddStaticDeviceLibs(&C, &T, &JA, &Inputs, C.getDriver(), DriverArgs, CC1Args,
-                      Arch, Target, isBitCodeSDL, postClangLink);
-}
-
-// Wrapper function used for post clang linking of bitcode SDLS for nvptx by
-// the CUDA toolchain.
-void tools::AddStaticDeviceLibsPostLinking(const Driver &D,
-                                const llvm::opt::ArgList &DriverArgs,
-                                llvm::opt::ArgStringList &CC1Args,
-                                StringRef Arch, StringRef Target,
-                                bool isBitCodeSDL, bool postClangLink) {
-  AddStaticDeviceLibs(nullptr, nullptr, nullptr, nullptr, D, DriverArgs,
-                      CC1Args, Arch, Target, isBitCodeSDL, postClangLink);
-}
-
-// User defined Static Device Libraries(SDLs) can be passed to clang for
-// offloading GPU compilers. Like static host libraries, the use of a SDL is
-// specified with the -l command line option. The primary 
diff erence between
-// host and SDLs is the filenames for SDLs (refer SEARCH-ORDER for Bitcode SDLs
-// and SEARCH-ORDER for Machine-code SDLs for the naming convention).
-// SDLs are of following types:
-//
-// * Bitcode SDLs: They can either be a *.bc file or an archive of *.bc files.
-//           For NVPTX, these libraries are post-clang linked following each
-//           compilation. For AMDGPU, these libraries are linked one time
-//           during the application link phase.
-//
-// * Machine-code SDLs: They are archive files. For NVPTX, the archive members
-//           contain cubin for Nvidia GPUs and are linked one time during the
-//           link phase by the CUDA SDK linker called nvlink.	For AMDGPU, the
-//           process for machine code SDLs is still in development. But they
-//           will be linked by the LLVM tool lld.
-//
-// * Bundled objects that contain both host and device codes: Bundled objects
-//           may also contain library code compiled from source. For NVPTX, the
-//           bundle contains cubin. For AMDGPU, the bundle contains bitcode.
-//
-// For Bitcode and Machine-code SDLs, current compiler toolchains hardcode the
-// inclusion of specific SDLs such as math libraries and the OpenMP device
-// library libomptarget.
-void tools::AddStaticDeviceLibs(Compilation *C, const Tool *T,
-                                const JobAction *JA,
-                                const InputInfoList *Inputs, const Driver &D,
-                                const llvm::opt::ArgList &DriverArgs,
-                                llvm::opt::ArgStringList &CC1Args,
-                                StringRef Arch, StringRef Target,
-                                bool isBitCodeSDL, bool postClangLink) {
-
-  SmallVector<std::string, 8> LibraryPaths;
-  // Add search directories from LIBRARY_PATH env variable
-  llvm::Optional<std::string> LibPath =
-      llvm::sys::Process::GetEnv("LIBRARY_PATH");
-  if (LibPath) {
-    SmallVector<StringRef, 8> Frags;
-    const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
-    llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
-    for (StringRef Path : Frags)
-      LibraryPaths.emplace_back(Path.trim());
-  }
-
-  // Add directories from user-specified -L options
-  for (std::string Search_Dir : DriverArgs.getAllArgValues(options::OPT_L))
-    LibraryPaths.emplace_back(Search_Dir);
-
-  // Add path to lib-debug folders
-  SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
-  llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
-  LibraryPaths.emplace_back(DefaultLibPath.c_str());
-
-  // Build list of Static Device Libraries SDLs specified by -l option
-  llvm::SmallSet<std::string, 16> SDLNames;
-  static const StringRef HostOnlyArchives[] = {
-      "omp", "cudart", "m", "gcc", "gcc_s", "pthread", "hip_hcc"};
-  for (auto SDLName : DriverArgs.getAllArgValues(options::OPT_l)) {
-    if (!HostOnlyArchives->contains(SDLName)) {
-      SDLNames.insert(SDLName);
-    }
-  }
-
-  // The search stops as soon as an SDL file is found. The driver then provides
-  // the full filename of the SDL to the llvm-link or clang-nvlink-wrapper
-  // command. If no SDL is found after searching each LINKPATH with
-  // SEARCH-ORDER, it is possible that an archive file lib<libname>.a exists
-  // and may contain bundled object files.
-  for (auto SDLName : SDLNames) {
-    // This is the only call to SDLSearch
-    if (!SDLSearch(D, DriverArgs, CC1Args, LibraryPaths, SDLName, Arch, Target,
-                   isBitCodeSDL, postClangLink)) {
-      GetSDLFromOffloadArchive(*C, D, *T, *JA, *Inputs, DriverArgs, CC1Args,
-                               LibraryPaths, SDLName, Arch, Target,
-                               isBitCodeSDL, postClangLink);
-    }
-  }
-}
-
 static llvm::opt::Arg *
 getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
   // The last of -mcode-object-v3, -mno-code-object-v3 and

diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h
index 00291a3681c80..8e48f3e7a5c32 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -49,39 +49,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
                     llvm::opt::ArgStringList &CmdArgs,
                     const llvm::opt::ArgList &Args);
 
-void AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
-                                const JobAction &JA,
-                                const InputInfoList &Inputs,
-                                const llvm::opt::ArgList &DriverArgs,
-                                llvm::opt::ArgStringList &CmdArgs,
-                                StringRef Arch, StringRef Target,
-                                bool isBitCodeSDL, bool postClangLink);
-void AddStaticDeviceLibsPostLinking(const Driver &D,
-                                    const llvm::opt::ArgList &DriverArgs,
-                                    llvm::opt::ArgStringList &CmdArgs,
-                                    StringRef Arch, StringRef Target,
-                                    bool isBitCodeSDL, bool postClangLink);
-void AddStaticDeviceLibs(Compilation *C, const Tool *T, const JobAction *JA,
-                         const InputInfoList *Inputs, const Driver &D,
-                         const llvm::opt::ArgList &DriverArgs,
-                         llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
-                         StringRef Target, bool isBitCodeSDL,
-                         bool postClangLink);
-
-bool SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
-               llvm::opt::ArgStringList &CmdArgs,
-               SmallVector<std::string, 8> LibraryPaths, std::string Lib,
-               StringRef Arch, StringRef Target, bool isBitCodeSDL,
-               bool postClangLink);
-
-bool GetSDLFromOffloadArchive(Compilation &C, const Driver &D, const Tool &T,
-                              const JobAction &JA, const InputInfoList &Inputs,
-                              const llvm::opt::ArgList &DriverArgs,
-                              llvm::opt::ArgStringList &CC1Args,
-                              SmallVector<std::string, 8> LibraryPaths,
-                              StringRef Lib, StringRef Arch, StringRef Target,
-                              bool isBitCodeSDL, bool postClangLink);
-
 const char *SplitDebugName(const JobAction &JA, const llvm::opt::ArgList &Args,
                            const InputInfo &Input, const InputInfo &Output);
 

diff  --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 18351dae39f7e..e4a6fb8d7f2ba 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -610,11 +610,8 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(CubinF);
   }
 
-  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
-                      false, false);
-
   const char *Exec =
-      Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
+      Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
   C.addCommand(std::make_unique<Command>(
       JA, *this,
       ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
@@ -744,8 +741,6 @@ void CudaToolChain::addClangTargetOptions(
 
     addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
                        getTriple());
-    AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
-                        /* bitcode SDL?*/ true, /* PostClang Link? */ true);
   }
 }
 

diff  --git a/clang/test/Driver/Inputs/openmp_static_device_link/libFatArchive.a b/clang/test/Driver/Inputs/openmp_static_device_link/libFatArchive.a
deleted file mode 100644
index ebd7e55898b3a..0000000000000
Binary files a/clang/test/Driver/Inputs/openmp_static_device_link/libFatArchive.a and /dev/null 
diff er

diff  --git a/clang/test/Driver/fat_archive_amdgpu.cpp b/clang/test/Driver/fat_archive_amdgpu.cpp
deleted file mode 100644
index b64ba8b97478c..0000000000000
--- a/clang/test/Driver/fat_archive_amdgpu.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// REQUIRES: clang-driver
-// REQUIRES: x86-registered-target
-// REQUIRES: amdgpu-registered-target
-
-// See the steps to create a fat archive are given at the end of the file.
-
-// Given a FatArchive, clang-offload-bundler should be called to create a
-// device specific archive, which should be passed to llvm-link.
-// RUN: %clang -O2 -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "[[GPU:gfx[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.bc]]" "-x" "c++"{{.*}}.cpp
-// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-amdgcn-amd-amdhsa-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
-// CHECK: llvm-link{{.*}}"[[HOSTBC]]" "[[DEVICESPECIFICARCHIVE]]" "-o" "{{.*}}-[[GPU]]-linked-{{.*}}.bc"
-// CHECK: ld"{{.*}}" "-L{{.*}}/Inputs/openmp_static_device_link" "{{.*}} "-lFatArchive" "{{.*}}" "-lomp{{.*}}-lomptarget"
-// expected-no-diagnostics
-
-#ifndef HEADER
-#define HEADER
-
-#define N 10
-
-#pragma omp declare target
-// Functions defined in Fat Archive.
-extern "C" void func_present(float *, float *, unsigned);
-
-#ifdef MISSING
-// Function not defined in the fat archive.
-extern "C" void func_missing(float *, float *, unsigned);
-#endif
-
-#pragma omp end declare target
-
-int main() {
-  float in[N], out[N], sum = 0;
-  unsigned i;
-
-#pragma omp parallel for
-  for (i = 0; i < N; ++i) {
-    in[i] = i;
-  }
-
-  func_present(in, out, N); // Returns out[i] = a[i] * 0
-
-#ifdef MISSING
-  func_missing(in, out, N); // Should throw an error here
-#endif
-
-#pragma omp parallel for reduction(+ \
-                                   : sum)
-  for (i = 0; i < N; ++i)
-    sum += out[i];
-
-  if (!sum)
-    return 0;
-  return sum;
-}
-
-#endif
-
-/***********************************************
-   Steps to create Fat Archive (libFatArchive.a)
-************************************************
-***************** File: func_1.c ***************
-void func_present(float* in, float* out, unsigned n){
-  unsigned i;
-  #pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
-  for(i=0; i<n; ++i){
-    out[i] = in[i] * 0;
-  }
-}
-*************************************************
-1. Compile source file(s) to generate object file(s)
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
-    clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
-    clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o
-
-2. Create a fat archive by combining all the object file(s)
-    llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
-************************************************/

diff  --git a/clang/test/Driver/fat_archive_nvptx.cpp b/clang/test/Driver/fat_archive_nvptx.cpp
deleted file mode 100644
index 72e20d00651e7..0000000000000
--- a/clang/test/Driver/fat_archive_nvptx.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// REQUIRES: clang-driver
-// REQUIRES: x86-registered-target
-// REQUIRES: nvptx-registered-target
-
-// See the steps to create a fat archive are given at the end of the file.
-
-// Given a FatArchive, clang-offload-bundler should be called to create a
-// device specific archive, which should be passed to clang-nvlink-wrapper.
-// RUN: %clang -O2 -### -fopenmp -fopenmp-targets=nvptx64 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
-// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
-// CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
-// CHECK: ld"{{.*}}" "-L{{.*}}/Inputs/openmp_static_device_link" "{{.*}} "-lFatArchive" "{{.*}}" "-lomp{{.*}}-lomptarget"
-// expected-no-diagnostics
-
-#ifndef HEADER
-#define HEADER
-
-#define N 10
-
-#pragma omp declare target
-// Functions defined in Fat Archive.
-extern "C" void func_present(float *, float *, unsigned);
-
-#ifdef MISSING
-// Function not defined in the fat archive.
-extern "C" void func_missing(float *, float *, unsigned);
-#endif
-
-#pragma omp end declare target
-
-int main() {
-  float in[N], out[N], sum = 0;
-  unsigned i;
-
-#pragma omp parallel for
-  for (i = 0; i < N; ++i) {
-    in[i] = i;
-  }
-
-  func_present(in, out, N); // Returns out[i] = a[i] * 0
-
-#ifdef MISSING
-  func_missing(in, out, N); // Should throw an error here
-#endif
-
-#pragma omp parallel for reduction(+ \
-                                   : sum)
-  for (i = 0; i < N; ++i)
-    sum += out[i];
-
-  if (!sum)
-    return 0;
-  return sum;
-}
-
-#endif
-
-/***********************************************
-   Steps to create Fat Archive (libFatArchive.a)
-************************************************
-***************** File: func_1.c ***************
-void func_present(float* in, float* out, unsigned n){
-  unsigned i;
-  #pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
-  for(i=0; i<n; ++i){
-    out[i] = in[i] * 0;
-  }
-}
-*************************************************
-1. Compile source file(s) to generate object file(s)
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
-    clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
-    clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
-    clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o
-
-2. Create a fat archive by combining all the object file(s)
-    llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
-************************************************/

diff  --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
index 0dbb75f67b289..522b5d33341fb 100644
--- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -180,28 +180,6 @@ struct OffloadTargetInfo {
   }
 };
 
-static StringRef getDeviceFileExtension(StringRef Device) {
-  if (Device.contains("gfx"))
-    return ".bc";
-  if (Device.contains("sm_"))
-    return ".cubin";
-
-  WithColor::warning() << "Could not determine extension for archive"
-                          "members, using \".o\"\n";
-  return ".o";
-}
-
-static std::string getDeviceLibraryFileName(StringRef BundleFileName,
-                                            StringRef Device) {
-  StringRef LibName = sys::path::stem(BundleFileName);
-  StringRef Extension = getDeviceFileExtension(Device);
-
-  std::string Result;
-  Result += LibName;
-  Result += Extension;
-  return Result;
-}
-
 /// Generic file handler interface.
 class FileHandler {
 public:
@@ -1251,9 +1229,7 @@ static Error UnbundleArchive() {
           BundledObjectFileName.assign(BundledObjectFile);
           auto OutputBundleName =
               Twine(llvm::sys::path::stem(BundledObjectFileName) + "-" +
-                    CodeObject +
-                    getDeviceLibraryFileName(BundledObjectFileName,
-                                             CodeObjectInfo.GPUArch))
+                    CodeObject)
                   .str();
           // Replace ':' in optional target feature list with '_' to ensure
           // cross-platform validity.