[llvm-branch-commits] [llvm] eef17cb - Merge branch 'main' into revert-154018-fix/153745

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Aug 19 09:49:09 PDT 2025


Author: Oleksandr T.
Date: 2025-08-19T19:48:53+03:00
New Revision: eef17cb4dc546d528d5a80d8f681bedd83766891

URL: https://github.com/llvm/llvm-project/commit/eef17cb4dc546d528d5a80d8f681bedd83766891
DIFF: https://github.com/llvm/llvm-project/commit/eef17cb4dc546d528d5a80d8f681bedd83766891.diff

LOG: Merge branch 'main' into revert-154018-fix/153745

Added: 
    clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl
    clang/test/CodeGenHLSL/resources/res-array-local1.hlsl
    clang/test/CodeGenHLSL/resources/res-array-local2.hlsl
    clang/test/CodeGenHLSL/resources/res-array-local3.hlsl
    llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
    llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
    llvm/test/Other/offload-wrapper.ll
    llvm/tools/llvm-offload-wrapper/CMakeLists.txt
    llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp

Modified: 
    clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
    clang-tools-extra/clangd/AST.cpp
    clang-tools-extra/clangd/XRefs.cpp
    clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
    clang/docs/ClangLinkerWrapper.rst
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/DiagnosticDriverKinds.td
    clang/include/clang/Driver/ToolChain.h
    clang/lib/Driver/ToolChain.cpp
    clang/lib/Driver/ToolChains/Gnu.cpp
    clang/lib/Driver/ToolChains/Gnu.h
    clang/lib/Driver/ToolChains/Hurd.cpp
    clang/lib/Driver/ToolChains/Linux.cpp
    clang/lib/Driver/ToolChains/Managarm.cpp
    lld/test/wasm/pie.s
    lld/wasm/SyntheticSections.cpp
    llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
    llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CMakeLists.txt
    llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
    llvm/test/CodeGen/NVPTX/lower-args.ll
    llvm/test/CodeGen/NVPTX/lower-byval-args.ll
    polly/lib/Analysis/ScopBuilder.cpp

Removed: 
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/include/c++/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/include/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
    clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/include/c++/.keep
    clang/test/Driver/gcc-toolchain-libstdcxx.cpp


################################################################################
diff  --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
index 4b495e3877000..cda9c4e7a6e58 100644
--- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
@@ -188,7 +188,7 @@ static bool isKnownToHaveValue(const Expr &Cond, const ASTContext &Ctx,
 /// \return true iff all `CallExprs` visited have callees; false otherwise
 ///         indicating there is an unresolved indirect call.
 static bool populateCallees(const Stmt *StmtNode,
-                            llvm::SmallSet<const Decl *, 16> &Callees) {
+                            llvm::SmallPtrSet<const Decl *, 16> &Callees) {
   if (const auto *Call = dyn_cast<CallExpr>(StmtNode)) {
     const Decl *Callee = Call->getDirectCallee();
 
@@ -212,7 +212,7 @@ static bool populateCallees(const Stmt *StmtNode,
 /// returns true iff `SCC` contains `Func` and its' function set overlaps with
 /// `Callees`
 static bool overlap(ArrayRef<CallGraphNode *> SCC,
-                    const llvm::SmallSet<const Decl *, 16> &Callees,
+                    const llvm::SmallPtrSet<const Decl *, 16> &Callees,
                     const Decl *Func) {
   bool ContainsFunc = false, Overlap = false;
 
@@ -264,7 +264,7 @@ static bool hasRecursionOverStaticLoopCondVariables(const Expr *Cond,
   if (!hasStaticLocalVariable(Cond))
     return false;
 
-  llvm::SmallSet<const Decl *, 16> CalleesInLoop;
+  llvm::SmallPtrSet<const Decl *, 16> CalleesInLoop;
 
   if (!populateCallees(LoopStmt, CalleesInLoop)) {
     // If there are unresolved indirect calls, we assume there could

diff  --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp
index 82aee4c84d074..2f46ecc92576c 100644
--- a/clang-tools-extra/clangd/AST.cpp
+++ b/clang-tools-extra/clangd/AST.cpp
@@ -985,7 +985,7 @@ resolveForwardingParameters(const FunctionDecl *D, unsigned MaxDepth) {
     // Recurse on pack parameters
     size_t Depth = 0;
     const FunctionDecl *CurrentFunction = D;
-    llvm::SmallSet<const FunctionTemplateDecl *, 4> SeenTemplates;
+    llvm::SmallPtrSet<const FunctionTemplateDecl *, 4> SeenTemplates;
     if (const auto *Template = D->getPrimaryTemplate()) {
       SeenTemplates.insert(Template);
     }

diff  --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 11ee51072dccf..a253a630a48cc 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -1876,7 +1876,7 @@ static void fillSubTypes(const SymbolID &ID,
   });
 }
 
-using RecursionProtectionSet = llvm::SmallSet<const CXXRecordDecl *, 4>;
+using RecursionProtectionSet = llvm::SmallPtrSet<const CXXRecordDecl *, 4>;
 
 // Extracts parents from AST and populates the type hierarchy item.
 static void fillSuperTypes(const CXXRecordDecl &CXXRD, llvm::StringRef TUPath,

diff  --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
index 134ae89288300..bc9a790232507 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
@@ -181,7 +181,7 @@ struct ExtractionZone {
   bool requiresHoisting(const SourceManager &SM,
                         const HeuristicResolver *Resolver) const {
     // First find all the declarations that happened inside extraction zone.
-    llvm::SmallSet<const Decl *, 1> DeclsInExtZone;
+    llvm::SmallPtrSet<const Decl *, 1> DeclsInExtZone;
     for (auto *RootStmt : RootStmts) {
       findExplicitReferences(
           RootStmt,

diff  --git a/clang/docs/ClangLinkerWrapper.rst b/clang/docs/ClangLinkerWrapper.rst
index e69cdba434c93..eb38d2b8fb5ee 100644
--- a/clang/docs/ClangLinkerWrapper.rst
+++ b/clang/docs/ClangLinkerWrapper.rst
@@ -60,6 +60,10 @@ only for the linker wrapper will be forwarded to the wrapped linker job.
     --v                    Display the version number and exit
     --                     The separator for the wrapped linker arguments
 
+The linker wrapper will generate the appropriate runtime calls to register the
+generated device binary with the offloading runtime. To do this step manually we
+provide the ``llvm-offload-wrapper`` utility.
+
 Relocatable Linking
 ===================
 

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8ee1586e72e01..2a4c5ab4cf0cb 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -37,22 +37,6 @@ latest release, please see the `Clang Web Site <https://clang.llvm.org>`_ or the
 Potentially Breaking Changes
 ============================
 
-- Clang will now emit a warning if the auto-detected GCC installation
-  directory (i.e. the one with the largest version number) does not
-  contain libstdc++ include directories although a "complete" GCC
-  installation directory containing the include directories is
-  available. It is planned to change the auto-detection to prefer the
-  "complete" directory in the future.  The warning will disappear if
-  the libstdc++ include directories are either installed or removed
-  for all GCC installation directories considered by the
-  auto-detection; see the output of ``clang -v`` for a list of those
-  directories. If the GCC installations cannot be modified and
-  maintaining the current choice of the auto-detection is desired, the
-  GCC installation directory can be selected explicitly using the
-  ``--gcc-install-dir`` command line argument. This will silence the
-  warning. It can also be disabled using the
-  ``-Wno-gcc-install-dir-libstdcxx`` command line flag.
-
 C/C++ Language Potentially Breaking Changes
 -------------------------------------------
 

diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index b8c7c6e8d6909..6df8f9932f30f 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -885,9 +885,4 @@ def warn_drv_openacc_without_cir
     : Warning<"OpenACC directives will result in no runtime behavior; use "
               "-fclangir to enable runtime effect">,
       InGroup<SourceUsesOpenACC>;
-
-def warn_drv_gcc_install_dir_libstdcxx : Warning<
-    "future releases of the clang compiler will prefer GCC installations "
-    "containing libstdc++ include directories; '%0' would be chosen over '%1'">,
-    InGroup<DiagGroup<"gcc-install-dir-libstdcxx">>;
 }

diff  --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 1425714d34110..243056360370f 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -224,6 +224,9 @@ class ToolChain {
   static void addSystemFrameworkInclude(const llvm::opt::ArgList &DriverArgs,
                                         llvm::opt::ArgStringList &CC1Args,
                                         const Twine &Path);
+  static void addSystemInclude(const llvm::opt::ArgList &DriverArgs,
+                               llvm::opt::ArgStringList &CC1Args,
+                               const Twine &Path);
   static void addExternCSystemInclude(const llvm::opt::ArgList &DriverArgs,
                                       llvm::opt::ArgStringList &CC1Args,
                                       const Twine &Path);
@@ -243,9 +246,6 @@ class ToolChain {
   ///@}
 
 public:
-  static void addSystemInclude(const llvm::opt::ArgList &DriverArgs,
-                               llvm::opt::ArgStringList &CC1Args,
-                               const Twine &Path);
   virtual ~ToolChain();
 
   // Accessors

diff  --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 65b36217a940f..7667dbddb0ca2 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1409,6 +1409,13 @@ void ToolChain::addSystemFrameworkInclude(const llvm::opt::ArgList &DriverArgs,
   CC1Args.push_back(DriverArgs.MakeArgString(Path));
 }
 
+/// Utility function to add a system include directory to CC1 arguments.
+void ToolChain::addSystemInclude(const ArgList &DriverArgs,
+                                 ArgStringList &CC1Args, const Twine &Path) {
+  CC1Args.push_back("-internal-isystem");
+  CC1Args.push_back(DriverArgs.MakeArgString(Path));
+}
+
 /// Utility function to add a system include directory with extern "C"
 /// semantics to CC1 arguments.
 ///
@@ -1431,14 +1438,6 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs,
     addExternCSystemInclude(DriverArgs, CC1Args, Path);
 }
 
-/// Utility function to add a system include directory to CC1 arguments.
-/*static*/ void ToolChain::addSystemInclude(const ArgList &DriverArgs,
-                                            ArgStringList &CC1Args,
-                                            const Twine &Path) {
-  CC1Args.push_back("-internal-isystem");
-  CC1Args.push_back(DriverArgs.MakeArgString(Path));
-}
-
 /// Utility function to add a list of system framework directories to CC1.
 void ToolChain::addSystemFrameworkIncludes(const ArgList &DriverArgs,
                                            ArgStringList &CC1Args,

diff  --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 3dade2bdf2277..01b146db24f3e 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -2123,11 +2123,10 @@ void Generic_GCC::GCCInstallationDetector::init(
       StringRef TripleText =
           llvm::sys::path::filename(llvm::sys::path::parent_path(InstallDir));
 
-      SelectedInstallation.Version = GCCVersion::Parse(VersionText);
-      SelectedInstallation.GCCTriple.setTriple(TripleText);
-      SelectedInstallation.GCCInstallPath = std::string(InstallDir);
-      SelectedInstallation.GCCParentLibPath =
-          SelectedInstallation.GCCInstallPath + "/../../..";
+      Version = GCCVersion::Parse(VersionText);
+      GCCTriple.setTriple(TripleText);
+      GCCInstallPath = std::string(InstallDir);
+      GCCParentLibPath = GCCInstallPath + "/../../..";
       IsValid = true;
     }
     return;
@@ -2187,7 +2186,7 @@ void Generic_GCC::GCCInstallationDetector::init(
   // Loop over the various components which exist and select the best GCC
   // installation available. GCC installs are ranked by version number.
   const GCCVersion VersionZero = GCCVersion::Parse("0.0.0");
-  SelectedInstallation.Version = VersionZero;
+  Version = VersionZero;
   for (const std::string &Prefix : Prefixes) {
     auto &VFS = D.getVFS();
     if (!VFS.exists(Prefix))
@@ -2215,7 +2214,7 @@ void Generic_GCC::GCCInstallationDetector::init(
     }
 
     // Skip other prefixes once a GCC installation is found.
-    if (SelectedInstallation.Version > VersionZero)
+    if (Version > VersionZero)
       break;
   }
 }
@@ -2224,17 +2223,14 @@ void Generic_GCC::GCCInstallationDetector::print(raw_ostream &OS) const {
   for (const auto &InstallPath : CandidateGCCInstallPaths)
     OS << "Found candidate GCC installation: " << InstallPath << "\n";
 
-  if (!SelectedInstallation.GCCInstallPath.empty())
-    OS << "Selected GCC installation: " << SelectedInstallation.GCCInstallPath
-       << "\n";
+  if (!GCCInstallPath.empty())
+    OS << "Selected GCC installation: " << GCCInstallPath << "\n";
 
   for (const auto &Multilib : Multilibs)
     OS << "Candidate multilib: " << Multilib << "\n";
 
-  if (Multilibs.size() != 0 ||
-      !SelectedInstallation.SelectedMultilib.isDefault())
-    OS << "Selected multilib: " << SelectedInstallation.SelectedMultilib
-       << "\n";
+  if (Multilibs.size() != 0 || !SelectedMultilib.isDefault())
+    OS << "Selected multilib: " << SelectedMultilib << "\n";
 }
 
 bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
@@ -2772,50 +2768,14 @@ bool Generic_GCC::GCCInstallationDetector::ScanGCCForMultilibs(
   }
 
   Multilibs = Detected.Multilibs;
-  SelectedInstallation.SelectedMultilib =
-      Detected.SelectedMultilibs.empty() ? Multilib()
-                                         : Detected.SelectedMultilibs.back();
+  SelectedMultilib = Detected.SelectedMultilibs.empty()
+                         ? Multilib()
+                         : Detected.SelectedMultilibs.back();
   BiarchSibling = Detected.BiarchSibling;
 
   return true;
 }
 
-bool Generic_GCC::GCCInstallationDetector::SelectGCCInstallationDirectory(
-    const SmallVector<Generic_GCC::GCCInstallCandidate, 3> &Installations,
-    const ArgList &Args,
-    Generic_GCC::GCCInstallCandidate &SelectedInstallation) const {
-  if (Installations.empty())
-    return false;
-
-  SelectedInstallation =
-      *max_element(Installations, [](const auto &Max, const auto &I) {
-        return I.Version > Max.Version;
-      });
-
-  // FIXME Start selecting installation with libstdc++ in clang 22,
-  // using the current way of selecting the installation as a fallback
-  // only.  For now, warn if the installation with libstdc++ 
diff ers
-  // from SelectedInstallation.
-  const GCCInstallCandidate *InstallWithIncludes = nullptr;
-  for (const auto &I : Installations) {
-    if ((!InstallWithIncludes || I.Version > InstallWithIncludes->Version) &&
-        GCCInstallationHasLibStdcxxIncludePaths(I, Args))
-      InstallWithIncludes = &I;
-  }
-
-  if (InstallWithIncludes && SelectedInstallation.GCCInstallPath !=
-                                 InstallWithIncludes->GCCInstallPath)
-    D.Diag(diag::warn_drv_gcc_install_dir_libstdcxx)
-        << InstallWithIncludes->GCCInstallPath
-        << SelectedInstallation.GCCInstallPath;
-
-  // TODO Warn if SelectedInstallation does not contain libstdc++ includes
-  // although compiler flags indicate that it is required (C++ compilation,
-  // libstdc++ not explicitly disabled).
-
-  return true;
-}
-
 void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
     const llvm::Triple &TargetTriple, const ArgList &Args,
     const std::string &LibDir, StringRef CandidateTriple,
@@ -2845,7 +2805,6 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
        TargetTriple.getVendor() == llvm::Triple::Freescale ||
            TargetTriple.getVendor() == llvm::Triple::OpenEmbedded}};
 
-  SmallVector<GCCInstallCandidate, 3> Installations;
   for (auto &Suffix : Suffixes) {
     if (!Suffix.Active)
       continue;
@@ -2863,31 +2822,23 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
           continue; // Saw this path before; no need to look at it again.
       if (CandidateVersion.isOlderThan(4, 1, 1))
         continue;
-      if (CandidateVersion <= SelectedInstallation.Version && IsValid)
+      if (CandidateVersion <= Version)
         continue;
 
       if (!ScanGCCForMultilibs(TargetTriple, Args, LI->path(),
                                NeedsBiarchSuffix))
         continue;
 
-      GCCInstallCandidate Installation;
-      Installation.Version = CandidateVersion;
-      Installation.GCCTriple.setTriple(CandidateTriple);
+      Version = CandidateVersion;
+      GCCTriple.setTriple(CandidateTriple);
       // FIXME: We hack together the directory name here instead of
       // using LI to ensure stable path separators across Windows and
       // Linux.
-      Installation.GCCInstallPath =
-          (LibDir + "/" + LibSuffix + "/" + VersionText).str();
-      Installation.GCCParentLibPath =
-          (Installation.GCCInstallPath + "/../" + Suffix.ReversePath).str();
-      Installation.SelectedMultilib = getMultilib();
-
-      Installations.push_back(Installation);
+      GCCInstallPath = (LibDir + "/" + LibSuffix + "/" + VersionText).str();
+      GCCParentLibPath = (GCCInstallPath + "/../" + Suffix.ReversePath).str();
+      IsValid = true;
     }
   }
-
-  IsValid |=
-      SelectGCCInstallationDirectory(Installations, Args, SelectedInstallation);
 }
 
 bool Generic_GCC::GCCInstallationDetector::ScanGentooConfigs(
@@ -2965,12 +2916,10 @@ bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig(
                                    NeedsBiarchSuffix))
             continue;
 
-          SelectedInstallation.Version =
-              GCCVersion::Parse(ActiveVersion.second);
-          SelectedInstallation.GCCInstallPath = GentooPath;
-          SelectedInstallation.GCCParentLibPath =
-              GentooPath + std::string("/../../..");
-          SelectedInstallation.GCCTriple.setTriple(ActiveVersion.first);
+          Version = GCCVersion::Parse(ActiveVersion.second);
+          GCCInstallPath = GentooPath;
+          GCCParentLibPath = GentooPath + std::string("/../../..");
+          GCCTriple.setTriple(ActiveVersion.first);
           IsValid = true;
           return true;
         }
@@ -3173,9 +3122,8 @@ void Generic_GCC::AddMultilibIncludeArgs(const ArgList &DriverArgs,
   // gcc TOOL_INCLUDE_DIR.
   const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
   std::string LibPath(GCCInstallation.getParentLibPath());
-  ToolChain::addSystemInclude(DriverArgs, CC1Args,
-                              Twine(LibPath) + "/../" + GCCTriple.str() +
-                                  "/include");
+  addSystemInclude(DriverArgs, CC1Args,
+                   Twine(LibPath) + "/../" + GCCTriple.str() + "/include");
 
   const auto &Callback = Multilibs.includeDirsCallback();
   if (Callback) {
@@ -3262,14 +3210,12 @@ Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
     return;
 }
 
-static bool addLibStdCXXIncludePaths(llvm::vfs::FileSystem &vfs,
-                                     Twine IncludeDir, StringRef Triple,
-                                     Twine IncludeSuffix,
-                                     const llvm::opt::ArgList &DriverArgs,
-                                     llvm::opt::ArgStringList &CC1Args,
-                                     bool DetectDebian = false) {
-
-  if (!vfs.exists(IncludeDir))
+bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
+                                           Twine IncludeSuffix,
+                                           const llvm::opt::ArgList &DriverArgs,
+                                           llvm::opt::ArgStringList &CC1Args,
+                                           bool DetectDebian) const {
+  if (!getVFS().exists(IncludeDir))
     return false;
 
   // Debian native gcc uses g++-multiarch-incdir.
diff  which uses
@@ -3281,48 +3227,39 @@ static bool addLibStdCXXIncludePaths(llvm::vfs::FileSystem &vfs,
   std::string Path =
       (Include + "/" + Triple + Dir.substr(Include.size()) + IncludeSuffix)
           .str();
-  if (DetectDebian && !vfs.exists(Path))
+  if (DetectDebian && !getVFS().exists(Path))
     return false;
 
   // GPLUSPLUS_INCLUDE_DIR
-  ToolChain::addSystemInclude(DriverArgs, CC1Args, IncludeDir);
+  addSystemInclude(DriverArgs, CC1Args, IncludeDir);
   // GPLUSPLUS_TOOL_INCLUDE_DIR. If Triple is not empty, add a target-dependent
   // include directory.
   if (DetectDebian)
-    ToolChain::addSystemInclude(DriverArgs, CC1Args, Path);
+    addSystemInclude(DriverArgs, CC1Args, Path);
   else if (!Triple.empty())
-    ToolChain::addSystemInclude(DriverArgs, CC1Args,
-                                IncludeDir + "/" + Triple + IncludeSuffix);
+    addSystemInclude(DriverArgs, CC1Args,
+                     IncludeDir + "/" + Triple + IncludeSuffix);
   // GPLUSPLUS_BACKWARD_INCLUDE_DIR
-  ToolChain::addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward");
+  addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward");
   return true;
 }
 
-bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
-                                           Twine IncludeSuffix,
-                                           const llvm::opt::ArgList &DriverArgs,
-                                           llvm::opt::ArgStringList &CC1Args,
-                                           bool DetectDebian) const {
-  return ::addLibStdCXXIncludePaths(getVFS(), IncludeDir, Triple, IncludeSuffix,
-                                    DriverArgs, CC1Args, DetectDebian);
-}
-
-bool Generic_GCC::GCCInstallCandidate::addGCCLibStdCxxIncludePaths(
-    llvm::vfs::FileSystem &vfs, const llvm::opt::ArgList &DriverArgs,
-    llvm::opt::ArgStringList &CC1Args, StringRef DebianMultiarch) const {
+bool Generic_GCC::addGCCLibStdCxxIncludePaths(
+    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+    StringRef DebianMultiarch) const {
+  assert(GCCInstallation.isValid());
 
   // By default, look for the C++ headers in an include directory adjacent to
   // the lib directory of the GCC installation. Note that this is expect to be
   // equivalent to '/usr/include/c++/X.Y' in almost all cases.
-  StringRef LibDir = getParentLibPath();
-  StringRef InstallDir = getInstallPath();
-  StringRef TripleStr = getTriple().str();
-  const Multilib &Multilib = getMultilib();
-  const GCCVersion &Version = getVersion();
+  StringRef LibDir = GCCInstallation.getParentLibPath();
+  StringRef InstallDir = GCCInstallation.getInstallPath();
+  StringRef TripleStr = GCCInstallation.getTriple().str();
+  const Multilib &Multilib = GCCInstallation.getMultilib();
+  const GCCVersion &Version = GCCInstallation.getVersion();
 
   // Try /../$triple/include/c++/$version (gcc --print-multiarch is not empty).
-  if (::addLibStdCXXIncludePaths(
-          vfs,
+  if (addLibStdCXXIncludePaths(
           LibDir.str() + "/../" + TripleStr + "/include/c++/" + Version.Text,
           TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args))
     return true;
@@ -3330,24 +3267,22 @@ bool Generic_GCC::GCCInstallCandidate::addGCCLibStdCxxIncludePaths(
   // Try /gcc/$triple/$version/include/c++/ (gcc --print-multiarch is not
   // empty). Like above but for GCC built with
   // --enable-version-specific-runtime-libs.
-  if (::addLibStdCXXIncludePaths(vfs,
-                                 LibDir.str() + "/gcc/" + TripleStr + "/" +
-                                     Version.Text + "/include/c++/",
-                                 TripleStr, Multilib.includeSuffix(),
-                                 DriverArgs, CC1Args))
+  if (addLibStdCXXIncludePaths(LibDir.str() + "/gcc/" + TripleStr + "/" +
+                                   Version.Text + "/include/c++/",
+                               TripleStr, Multilib.includeSuffix(), DriverArgs,
+                               CC1Args))
     return true;
 
   // Detect Debian g++-multiarch-incdir.
diff .
-  if (::addLibStdCXXIncludePaths(
-          vfs, LibDir.str() + "/../include/c++/" + Version.Text,
-          DebianMultiarch, Multilib.includeSuffix(), DriverArgs, CC1Args,
-          /*Debian=*/true))
+  if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
+                               DebianMultiarch, Multilib.includeSuffix(),
+                               DriverArgs, CC1Args, /*Debian=*/true))
     return true;
 
   // Try /../include/c++/$version (gcc --print-multiarch is empty).
-  if (::addLibStdCXXIncludePaths(
-          vfs, LibDir.str() + "/../include/c++/" + Version.Text, TripleStr,
-          Multilib.includeSuffix(), DriverArgs, CC1Args))
+  if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
+                               TripleStr, Multilib.includeSuffix(), DriverArgs,
+                               CC1Args))
     return true;
 
   // Otherwise, fall back on a bunch of options which don't use multiarch
@@ -3362,50 +3297,20 @@ bool Generic_GCC::GCCInstallCandidate::addGCCLibStdCxxIncludePaths(
   };
 
   for (const auto &IncludePath : LibStdCXXIncludePathCandidates) {
-    if (::addLibStdCXXIncludePaths(vfs, IncludePath, TripleStr,
-                                   Multilib.includeSuffix(), DriverArgs,
-                                   CC1Args))
+    if (addLibStdCXXIncludePaths(IncludePath, TripleStr,
+                                 Multilib.includeSuffix(), DriverArgs, CC1Args))
       return true;
   }
   return false;
 }
 
-bool Generic_GCC::GCCInstallationDetector::
-    GCCInstallationHasLibStdcxxIncludePaths(
-        const GCCInstallCandidate &GCCInstallation,
-        const llvm::opt::ArgList &DriverArgs) const {
-  StringRef DebianMultiarch =
-      TripleToDebianMultiarch(GCCInstallation.getTriple());
-
-  // The following function checks for libstdc++ include paths and
-  // adds them to the provided argument list.  Here we just need the
-  // check.
-  llvm::opt::ArgStringList dummyCC1Args;
-  return GCCInstallation.addGCCLibStdCxxIncludePaths(
-      D.getVFS(), DriverArgs, dummyCC1Args, DebianMultiarch);
-}
-
-bool Generic_GCC::addGCCLibStdCxxIncludePaths(
-    const llvm::opt::ArgList &DriverArgs,
-    llvm::opt::ArgStringList &CC1Args) const {
-  assert(GCCInstallation.isValid());
-
-  // Detect Debian g++-multiarch-incdir.
diff .
-  StringRef DebianMultiarch =
-      GCCInstallation.TripleToDebianMultiarch(GCCInstallation.getTriple());
-
-  return GCCInstallation.getSelectedInstallation().addGCCLibStdCxxIncludePaths(
-      getVFS(), DriverArgs, CC1Args, DebianMultiarch);
-}
-
 void
 Generic_GCC::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
                                       llvm::opt::ArgStringList &CC1Args) const {
-  if (!GCCInstallation.isValid())
-    return;
-
-  GCCInstallation.getSelectedInstallation().addGCCLibStdCxxIncludePaths(
-      getVFS(), DriverArgs, CC1Args, GCCInstallation.getTriple().str());
+  if (GCCInstallation.isValid()) {
+    addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args,
+                                GCCInstallation.getTriple().str());
+  }
 }
 
 llvm::opt::DerivedArgList *

diff  --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h
index 5fe143b4aa035..ad817452e5933 100644
--- a/clang/lib/Driver/ToolChains/Gnu.h
+++ b/clang/lib/Driver/ToolChains/Gnu.h
@@ -184,39 +184,6 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
     bool operator>=(const GCCVersion &RHS) const { return !(*this < RHS); }
   };
 
-  struct GCCInstallCandidate {
-    // FIXME: These might be better as path objects.
-    std::string GCCInstallPath;
-    std::string GCCParentLibPath;
-
-    llvm::Triple GCCTriple;
-
-    /// The primary multilib appropriate for the given flags.
-    Multilib SelectedMultilib;
-
-    GCCVersion Version;
-
-    /// Get the GCC triple for the detected install.
-    const llvm::Triple &getTriple() const { return GCCTriple; }
-
-    /// Get the detected GCC installation path.
-    StringRef getInstallPath() const { return GCCInstallPath; }
-
-    /// Get the detected GCC parent lib path.
-    StringRef getParentLibPath() const { return GCCParentLibPath; }
-
-    /// Get the detected Multilib
-    const Multilib &getMultilib() const { return SelectedMultilib; }
-
-    /// Get the detected GCC version string.
-    const GCCVersion &getVersion() const { return Version; }
-
-    bool addGCCLibStdCxxIncludePaths(llvm::vfs::FileSystem &vfs,
-                                     const llvm::opt::ArgList &DriverArgs,
-                                     llvm::opt::ArgStringList &CC1Args,
-                                     StringRef DebianMultiarch) const;
-  };
-
   /// This is a class to find a viable GCC installation for Clang to
   /// use.
   ///
@@ -225,15 +192,21 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
   /// Driver, and has logic for fuzzing that where appropriate.
   class GCCInstallationDetector {
     bool IsValid;
-
+    llvm::Triple GCCTriple;
     const Driver &D;
 
-    GCCInstallCandidate SelectedInstallation;
+    // FIXME: These might be better as path objects.
+    std::string GCCInstallPath;
+    std::string GCCParentLibPath;
 
+    /// The primary multilib appropriate for the given flags.
+    Multilib SelectedMultilib;
     /// On Biarch systems, this corresponds to the default multilib when
     /// targeting the non-default multilib. Otherwise, it is empty.
     std::optional<Multilib> BiarchSibling;
 
+    GCCVersion Version;
+
     // We retain the list of install paths that were considered and rejected in
     // order to print out detailed information in verbose mode.
     std::set<std::string> CandidateGCCInstallPaths;
@@ -245,50 +218,23 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
     const std::string GentooConfigDir = "/etc/env.d/gcc";
 
   public:
-    /// Function for converting a triple to a Debian multiarch.  The
-    /// toolchains use this to adjust the target specific component of
-    /// include paths for Debian.
-    std::function<StringRef(const llvm::Triple &)> TripleToDebianMultiarch =
-        [](const llvm::Triple &T) {
-          StringRef S = T.str();
-          return S;
-        };
-
     explicit GCCInstallationDetector(const Driver &D) : IsValid(false), D(D) {}
-
     void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args);
 
-    // TODO Replace isValid by changing SelectedInstallation into
-    // std::optional<SelectedInstallation>
-    // and move all accessors for fields of GCCInstallCandidate into
-    // that struct.
-
     /// Check whether we detected a valid GCC install.
     bool isValid() const { return IsValid; }
 
-    const GCCInstallCandidate &getSelectedInstallation() const {
-      return SelectedInstallation;
-    }
-
     /// Get the GCC triple for the detected install.
-    const llvm::Triple &getTriple() const {
-      return SelectedInstallation.GCCTriple;
-    }
+    const llvm::Triple &getTriple() const { return GCCTriple; }
 
     /// Get the detected GCC installation path.
-    StringRef getInstallPath() const {
-      return SelectedInstallation.GCCInstallPath;
-    }
+    StringRef getInstallPath() const { return GCCInstallPath; }
 
     /// Get the detected GCC parent lib path.
-    StringRef getParentLibPath() const {
-      return SelectedInstallation.GCCParentLibPath;
-    }
+    StringRef getParentLibPath() const { return GCCParentLibPath; }
 
     /// Get the detected Multilib
-    const Multilib &getMultilib() const {
-      return SelectedInstallation.SelectedMultilib;
-    }
+    const Multilib &getMultilib() const { return SelectedMultilib; }
 
     /// Get the whole MultilibSet
     const MultilibSet &getMultilibs() const { return Multilibs; }
@@ -298,9 +244,7 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
     bool getBiarchSibling(Multilib &M) const;
 
     /// Get the detected GCC version string.
-    const GCCVersion &getVersion() const {
-      return SelectedInstallation.Version;
-    }
+    const GCCVersion &getVersion() const { return Version; }
 
     /// Print information about the detected GCC installation.
     void print(raw_ostream &OS) const;
@@ -318,19 +262,6 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
                                SmallVectorImpl<std::string> &Prefixes,
                                StringRef SysRoot);
 
-    /// Checks if the \p GCCInstallation has libstdc++ include
-    /// directories.
-    bool GCCInstallationHasLibStdcxxIncludePaths(
-        const GCCInstallCandidate &GCCInstallation,
-        const llvm::opt::ArgList &DriverArgs) const;
-
-    /// Select a GCC installation directory from \p Installations and
-    /// set \p SelectedInstallation accordingly.
-    bool SelectGCCInstallationDirectory(
-        const SmallVector<GCCInstallCandidate, 3> &Installations,
-        const llvm::opt::ArgList &Args,
-        GCCInstallCandidate &SelectedInstallation) const;
-
     bool ScanGCCForMultilibs(const llvm::Triple &TargetTriple,
                              const llvm::opt::ArgList &Args, StringRef Path,
                              bool NeedsBiarchSuffix = false);
@@ -417,7 +348,8 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
                            llvm::opt::ArgStringList &CC1Args) const;
 
   bool addGCCLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
-                                   llvm::opt::ArgStringList &CC) const;
+                                   llvm::opt::ArgStringList &CC1Args,
+                                   StringRef DebianMultiarch) const;
 
   bool addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
                                 Twine IncludeSuffix,

diff  --git a/clang/lib/Driver/ToolChains/Hurd.cpp b/clang/lib/Driver/ToolChains/Hurd.cpp
index 8bcc7e6d9759e..a22a8face1797 100644
--- a/clang/lib/Driver/ToolChains/Hurd.cpp
+++ b/clang/lib/Driver/ToolChains/Hurd.cpp
@@ -71,13 +71,6 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
 
 Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : Generic_ELF(D, Triple, Args) {
-  GCCInstallation.TripleToDebianMultiarch = [](const llvm::Triple &T) {
-    StringRef TripleStr = T.str();
-    StringRef DebianMultiarch =
-        T.getArch() == llvm::Triple::x86 ? "i386-gnu" : TripleStr;
-    return DebianMultiarch;
-  };
-
   GCCInstallation.init(Triple, Args);
   Multilibs = GCCInstallation.getMultilibs();
   SelectedMultilibs.assign({GCCInstallation.getMultilib()});
@@ -214,7 +207,12 @@ void Hurd::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
   if (!GCCInstallation.isValid())
     return;
 
-  addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args);
+  StringRef TripleStr = GCCInstallation.getTriple().str();
+  StringRef DebianMultiarch =
+      GCCInstallation.getTriple().getArch() == llvm::Triple::x86 ? "i386-gnu"
+                                                                 : TripleStr;
+
+  addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args, DebianMultiarch);
 }
 
 void Hurd::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const {

diff  --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 16e35b08cfbd6..8ac8d4eb91812 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -211,13 +211,6 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
 
 Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : Generic_ELF(D, Triple, Args) {
-  GCCInstallation.TripleToDebianMultiarch = [](const llvm::Triple &T) {
-    StringRef TripleStr = T.str();
-    StringRef DebianMultiarch =
-        T.getArch() == llvm::Triple::x86 ? "i386-linux-gnu" : TripleStr;
-    return DebianMultiarch;
-  };
-
   GCCInstallation.init(Triple, Args);
   Multilibs = GCCInstallation.getMultilibs();
   SelectedMultilibs.assign({GCCInstallation.getMultilib()});
@@ -700,15 +693,22 @@ void Linux::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
   if (!GCCInstallation.isValid())
     return;
 
+  // Detect Debian g++-multiarch-incdir.
diff .
+  StringRef TripleStr = GCCInstallation.getTriple().str();
+  StringRef DebianMultiarch =
+      GCCInstallation.getTriple().getArch() == llvm::Triple::x86
+          ? "i386-linux-gnu"
+          : TripleStr;
+
   // Try generic GCC detection first.
-  if (Generic_GCC::addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args))
+  if (Generic_GCC::addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args,
+                                               DebianMultiarch))
     return;
 
   StringRef LibDir = GCCInstallation.getParentLibPath();
   const Multilib &Multilib = GCCInstallation.getMultilib();
   const GCCVersion &Version = GCCInstallation.getVersion();
 
-  StringRef TripleStr = GCCInstallation.getTriple().str();
   const std::string LibStdCXXIncludePathCandidates[] = {
       // Android standalone toolchain has C++ headers in yet another place.
       LibDir.str() + "/../" + TripleStr.str() + "/include/c++/" + Version.Text,

diff  --git a/clang/lib/Driver/ToolChains/Managarm.cpp b/clang/lib/Driver/ToolChains/Managarm.cpp
index da4a9072317f4..0f56f0f6aad77 100644
--- a/clang/lib/Driver/ToolChains/Managarm.cpp
+++ b/clang/lib/Driver/ToolChains/Managarm.cpp
@@ -193,8 +193,10 @@ void Managarm::addLibStdCxxIncludePaths(
   if (!GCCInstallation.isValid())
     return;
 
+  StringRef TripleStr = GCCInstallation.getTriple().str();
+
   // Try generic GCC detection.
-  addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args);
+  Generic_GCC::addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args, TripleStr);
 }
 
 SanitizerMask Managarm::getSupportedSanitizers() const {

diff  --git a/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl
new file mode 100644
index 0000000000000..d803882fd2f72
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN:   -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies handling of multi-dimensional local arrays of resources
+// when used as a function argument and local variable.
+
+// CHECK: @_ZL1A = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1B = internal global %"class.hlsl::RWBuffer" poison, align 4
+
+RWBuffer<float> A : register(u10);
+RWBuffer<float> B : register(u20);
+RWStructuredBuffer<float> Out;
+
+// NOTE: _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float> and
+//       _ZN4hlsl18RWStructuredBufferIfEixEj is the subscript operator for RWStructuredBuffer<float>
+
+// CHECK: define {{.*}} float @_Z3fooA2_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([2 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %Arr)
+// CHECK-NEXT: entry:
+float foo(RWBuffer<float> Arr[2][2]) {
+// CHECK-NEXT: %[[Arr_1_Ptr:.*]] = getelementptr inbounds [2 x [2 x %"class.hlsl::RWBuffer"]], ptr %Arr, i32 0, i32 1
+// CHECK-NEXT: %[[Arr_1_1_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %[[Arr_1_Ptr]], i32 0, i32 1
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Arr_1_1_Ptr]], i32 noundef 0)
+// CHECK-NEXT: %[[Value:.*]] = load float, ptr %[[BufPtr]], align 4
+// CHECK-NEXT: ret float %[[Value]]
+  return Arr[1][1][0];
+}
+
+// CHECK: define internal void @_Z4mainv()
+// CHECK-NEXT: entry:
+[numthreads(4,1,1)]
+void main() {
+// CHECK-NEXT: %L = alloca [2 x [2 x %"class.hlsl::RWBuffer"]], align 4
+// CHECK-NEXT: %[[Tmp:.*]] = alloca [2 x [2 x %"class.hlsl::RWBuffer"]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %L, ptr align 4 @_ZL1A, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr1:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %L, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr1]], ptr align 4 @_ZL1B, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr2:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %L, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr2]], ptr align 4 @_ZL1A, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr3:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %[[Ptr2]], i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr3]], ptr align 4 @_ZL1B, i32 4, i1 false)
+  RWBuffer<float> L[2][2] = { { A, B }, { A, B } };
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp]], ptr align 4 %L, i32 16, i1 false)
+// CHECK-NEXT: %[[ReturnedValue:.*]] = call {{.*}}float @_Z3fooA2_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([2 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %[[Tmp]])
+// CHECK-NEXT: %[[OutBufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr {{.*}} @_ZL3Out, i32 noundef 0)
+// CHECK-NEXT: store float %[[ReturnedValue]], ptr %[[OutBufPtr]], align 4
+// CHECK-NEXT: ret void
+  Out[0] = foo(L);
+}

diff  --git a/clang/test/CodeGenHLSL/resources/res-array-local1.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local1.hlsl
new file mode 100644
index 0000000000000..c0d508b1395c3
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local1.hlsl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN:   -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies local arrays of resources in HLSL.
+
+// CHECK: @_ZL1A = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1B = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1C = internal global %"class.hlsl::RWBuffer" poison, align 4
+
+RWBuffer<float> A : register(u1);
+RWBuffer<float> B : register(u2);
+RWBuffer<float> C : register(u3);
+RWStructuredBuffer<float> Out : register(u0);
+
+// CHECK: define internal void @_Z4mainv()
+// CHECK-NEXT: entry:
+[numthreads(4,1,1)]
+void main() {
+// CHECK-NEXT:  %First = alloca [3 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT:  %Second = alloca [4 x %"class.hlsl::RWBuffer"], align 4
+  RWBuffer<float> First[3] = { A, B, C };
+  RWBuffer<float> Second[4];
+
+// Verify initialization of First array from an initialization list
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %First, ptr align 4 @_ZL1A, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr1:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %First, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr1]], ptr align 4 @_ZL1B, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr2:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %First, i32 2
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr2]], ptr align 4 @_ZL1C, i32 4, i1 false)
+
+// Verify default initialization of Second array, which means there is a loop iterating
+// over the array elements and calling the default constructor for each
+// CHECK-NEXT: %[[ArrayBeginPtr:.*]] = getelementptr inbounds [4 x %"class.hlsl::RWBuffer"], ptr %Second, i32 0, i32 0
+// CHECK-NEXT: %[[ArrayEndPtr:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %[[ArrayBeginPtr]], i32 4
+// CHECK-NEXT: br label %[[ArrayInitLoop:.*]]
+// CHECK: [[ArrayInitLoop]]:
+// CHECK-NEXT: %[[ArrayCurPtr:.*]] = phi ptr [ %[[ArrayBeginPtr]], %entry ], [ %[[ArrayNextPtr:.*]], %[[ArrayInitLoop]] ]
+// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1Ev(ptr {{.*}} %[[ArrayCurPtr]])
+// CHECK-NEXT: %[[ArrayNextPtr]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %[[ArrayCurPtr]], i32 1
+// CHECK-NEXT: %[[ArrayInitDone:.*]] = icmp eq ptr %[[ArrayNextPtr]], %[[ArrayEndPtr]]
+// CHECK-NEXT: br i1 %[[ArrayInitDone]], label %[[AfterArrayInit:.*]], label %[[ArrayInitLoop]]
+// CHECK: [[AfterArrayInit]]:
+
+// Initialize First[2] with C
+// CHECK: %[[Ptr3:.*]] = getelementptr inbounds [4 x %"class.hlsl::RWBuffer"], ptr %Second, i32 0, i32 2
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr3]], ptr align 4 @_ZL1C, i32 4, i1 false)
+  Second[2] = C;
+
+  // NOTE: _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float>
+
+// get First[1][0] value
+// CHECK: %[[First_1_Ptr:.*]] = getelementptr inbounds [3 x %"class.hlsl::RWBuffer"], ptr %First, i32 0, i32 1
+// CHECK: %[[BufPtr1:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[First_1_Ptr]], i32 noundef 0)
+// CHECK: %[[Value1:.*]] = load float, ptr %[[BufPtr1]], align 4
+
+// get Second[2][0] value
+// CHECK: %[[Second_2_Ptr:.*]] = getelementptr inbounds [4 x %"class.hlsl::RWBuffer"], ptr %Second, i32 0, i32 2
+// CHECK: %[[BufPtr2:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Second_2_Ptr]], i32 noundef 0)
+// CHECK: %[[Value2:.*]] = load float, ptr %[[BufPtr2]], align 4
+
+// add them
+// CHECK: %{{.*}} = fadd {{.*}} float %[[Value1]], %[[Value2]]
+  Out[0] = First[1][0] + Second[2][0];
+}

diff  --git a/clang/test/CodeGenHLSL/resources/res-array-local2.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local2.hlsl
new file mode 100644
index 0000000000000..39f3aeb66ceb5
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local2.hlsl
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN:   -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies handling of local arrays of resources when used as a function argument.
+
+// CHECK: @_ZL1A = internal global [3 x %"class.hlsl::RWBuffer"] poison, align 4
+
+RWBuffer<float> A[3] : register(u0);
+RWStructuredBuffer<float> Out : register(u0);
+
+// NOTE: _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float> and
+//       _ZN4hlsl18RWStructuredBufferIfEixEj is the subscript operator for RWStructuredBuffer<float>
+
+// CHECK: define {{.*}} float @_Z3fooA3_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x %"class.hlsl::RWBuffer"]) align 4 %LocalA)
+// CHECK-NEXT: entry:
+float foo(RWBuffer<float> LocalA[3]) {
+// CHECK-NEXT: %[[LocalA_2_Ptr:.*]] = getelementptr inbounds [3 x %"class.hlsl::RWBuffer"], ptr %LocalA, i32 0, i32 2
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[LocalA_2_Ptr]], i32 noundef 0)
+// CHECK-NEXT: %[[Value:.*]] = load float, ptr %[[BufPtr]], align 4
+// CHECK-NEXT: ret float %[[Value]]
+  return LocalA[2][0];
+}
+
+// CHECK: define internal void @_Z4mainv()
+// CHECK-NEXT: entry:
+[numthreads(4,1,1)]
+void main() {
+// Check that the `main` function calls `foo` with a local copy of the array
+// CHECK-NEXT: %[[Tmp:.*]] = alloca [3 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp]], ptr align 4 @_ZL1A, i32 12, i1 false)
+
+// CHECK-NEXT: %[[ReturnedValue:.*]] = call {{.*}} float @_Z3fooA3_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x %"class.hlsl::RWBuffer"]) align 4 %[[Tmp]])
+// CHECK-NEXT: %[[OutBufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr {{.*}} @_ZL3Out, i32 noundef 0)
+// CHECK-NEXT: store float %[[ReturnedValue]], ptr %[[OutBufPtr]], align 4
+// CHECK-NEXT: ret void
+  Out[0] = foo(A);
+}

diff  --git a/clang/test/CodeGenHLSL/resources/res-array-local3.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local3.hlsl
new file mode 100644
index 0000000000000..e5bcdc651254f
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local3.hlsl
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN:   -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies handling of local arrays of resources when used
+// as a function argument that is modified inside the function.
+
+// CHECK: @_ZL1X = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1Y = internal global %"class.hlsl::RWBuffer" poison, align 4
+
+RWBuffer<int> X : register(u0);
+RWBuffer<int> Y : register(u1);
+
+// CHECK: define {{.*}} @_Z6SomeFnA2_N4hlsl8RWBufferIiEEji(
+// CHECK-SAME: ptr noundef byval([2 x %"class.hlsl::RWBuffer"]) align 4 %B, i32 noundef %Idx, i32 noundef %Val0)
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %[[Idx_addr:.*]] = alloca i32, align 4
+// CHECK-NEXT: %[[Val0_addr:.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 %Idx, ptr %[[Idx_addr]], align 4
+// CHECK-NEXT: store i32 %Val0, ptr %[[Val0_addr]], align 4
+void SomeFn(RWBuffer<int> B[2], uint Idx, int Val0) {
+
+// CHECK-NEXT: %[[B_0_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %B, i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[B_0_Ptr]], ptr align 4 @_ZL1Y, i32 4, i1 false)
+  B[0] = Y;
+
+// NOTE: _ZN4hlsl8RWBufferIiEixEj is the subscript operator for RWBuffer<int>
+
+// CHECK-NEXT: %[[Val0:.*]] = load i32, ptr %[[Val0_addr]], align 4
+// CHECK-NEXT: %[[B_0_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %B, i32 0, i32 0
+// CHECK-NEXT: %[[Idx:.*]] = load i32, ptr %[[Idx_addr]], align 4
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIiEixEj(ptr {{.*}} %[[B_0_Ptr]], i32 noundef %[[Idx]])
+// CHECK-NEXT: store i32 %[[Val0]], ptr %[[BufPtr]], align 4
+  B[0][Idx] = Val0;
+}
+
+// CHECK: define {{.*}} void @_Z4mainj(i32 noundef %GI)
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %[[GI_addr:.*]] = alloca i32, align 4
+[numthreads(4,1,1)]
+void main(uint GI : SV_GroupIndex) {
+// CHECK-NEXT: %A = alloca [2 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT: %[[Tmp:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT: store i32 %GI, ptr %GI.addr, align 4
+
+// Initialization of array A with resources X and Y
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %A, ptr align 4 @_ZL1X, i32 4, i1 false)
+// CHECK-NEXT: %[[A_1_Ptr:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %A, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[A_1_Ptr]], ptr align 4 @_ZL1Y, i32 4, i1 false)
+  RWBuffer<int> A[2] = {X, Y};
+
+// Verify that SomeFn is called with a local copy of the array A
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp]], ptr align 4 %A, i32 8, i1 false)
+// CHECK-NEXT: %[[GI:.*]] = load i32, ptr %[[GI_addr]], align 4
+// CHECK-NEXT: call void @_Z6SomeFnA2_N4hlsl8RWBufferIiEEji(ptr noundef byval([2 x %"class.hlsl::RWBuffer"]) align 4 %[[Tmp]], i32 noundef %[[GI]], i32 noundef 1)
+  SomeFn(A, GI, 1);
+
+// CHECK-NEXT: %[[A_0_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %A, i32 0, i32 0
+// CHECK-NEXT: %[[GI:.*]] = load i32, ptr %[[GI_addr]], align 4
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIiEixEj(ptr {{.*}} %[[A_0_Ptr]], i32 noundef %[[GI]])
+// CHECK-NEXT: store i32 2, ptr %[[BufPtr]], align 4
+  A[0][GI] = 2;
+}

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Driver/gcc-toolchain-libstdcxx.cpp b/clang/test/Driver/gcc-toolchain-libstdcxx.cpp
deleted file mode 100644
index 6ec1674500022..0000000000000
--- a/clang/test/Driver/gcc-toolchain-libstdcxx.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// UNSUPPORTED: system-windows
-
-// This file tests that the GCC installation directory detection takes
-// the libstdc++ includes into account.  In each directory
-// Inputs/gcc_toolchain_libstdcxx/gccX, the installation directory for
-// gcc X should be picked in the future since it is the directory with
-// the largest version number which also contains the libstdc++
-// include directory.
-
-// RUN: %clang --gcc-toolchain=%S/Inputs/gcc_toolchain_libstdcxx/gcc10/usr -v 2>&1 |& FileCheck %s --check-prefix=GCC10
-// GCC10: clang: warning: future releases of the clang compiler will prefer GCC installations containing libstdc++ include directories; '[[PREFIX:.*gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu]]/10' would be chosen over '[[PREFIX]]/12' [-Wgcc-install-dir-libstdcxx]
-// GCC10: Found candidate GCC installation: [[PREFIX]]/10
-// GCC10: Found candidate GCC installation: [[PREFIX]]/11
-// GCC10: Found candidate GCC installation: [[PREFIX]]/12
-// GCC10: Selected GCC installation: [[PREFIX]]/12
-
-// RUN: %clang --gcc-toolchain=%S/Inputs/gcc_toolchain_libstdcxx/gcc11/usr -v 2>&1 |& FileCheck %s --check-prefix=ONLY_GCC11
-// ONLY_GCC11: clang: warning: future releases of the clang compiler will prefer GCC installations containing libstdc++ include directories; '[[PREFIX:.*gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu]]/11' would be chosen over '[[PREFIX]]/12' [-Wgcc-install-dir-libstdcxx]
-// ONLY_GCC11: Found candidate GCC installation: [[PREFIX]]/10
-// ONLY_GCC11: Found candidate GCC installation: [[PREFIX]]/11
-// ONLY_GCC11: Found candidate GCC installation: [[PREFIX]]/12
-// ONLY_GCC11: Selected GCC installation: [[PREFIX]]/12
-
-// RUN: %clang --gcc-toolchain=%S/Inputs/gcc_toolchain_libstdcxx/gcc12/usr -v 2>&1 |& FileCheck %s --check-prefix=GCC12
-// GCC12: Found candidate GCC installation: [[PREFIX:.*gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu]]/10
-// GCC12: Found candidate GCC installation: [[PREFIX]]/11
-// GCC12: Found candidate GCC installation: [[PREFIX]]/12
-// GCC12: Selected GCC installation: [[PREFIX]]/12

diff  --git a/lld/test/wasm/pie.s b/lld/test/wasm/pie.s
index 433958e149d8f..21eac79207318 100644
--- a/lld/test/wasm/pie.s
+++ b/lld/test/wasm/pie.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten %S/Inputs/internal_func.s -o %t.internal_func.o
 # RUN: wasm-ld --no-gc-sections --experimental-pic -pie --unresolved-symbols=import-dynamic -o %t.wasm %t.o %t.internal_func.o
 # RUN: obj2yaml %t.wasm | FileCheck %s
-# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs,__wasm_apply_global_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DISASSEM
+# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DISASSEM
 
 .functype external_func () -> ()
 .functype internal_func1 () -> (i32)
@@ -41,9 +41,6 @@ foo:
   drop
 
   global.get __stack_pointer
-  drop
-
-  global.get __wasm_first_page_end at GOT
   end_function
 
 get_data_address:
@@ -148,18 +145,6 @@ _start:
 # DISASSEM-LABEL:  <__wasm_apply_data_relocs>:
 # DISASSEM:        end
 
-# global 6 is __wasm_first_page_end, which is ABSOLUTE and
-# thus should not be relocated.
-#
-# DISASSEM-LABEL:  <__wasm_apply_global_relocs>:
-# DISASSEM:        global.set      4
-# DISASSEM:        global.set      5
-# DISASSEM-NOT:    global.set      6
-# DISASSEM:        global.set      7
-# DISASSEM:        global.set      8
-# DISASSEM:        global.set      9
-# DISASSEM:        end
-
 # Run the same test with extended-const support.  When this is available
 # we don't need __wasm_apply_global_relocs and instead rely on the add
 # instruction in the InitExpr.  We also, therefore, do not need these globals
@@ -188,23 +173,17 @@ _start:
 # EXTENDED-CONST-NEXT:        Type:            I32
 # EXTENDED-CONST-NEXT:        Mutable:         false
 # EXTENDED-CONST-NEXT:        InitExpr:
-# EXTENDED-CONST-NEXT:          Opcode:        I32_CONST
-# EXTENDED-CONST-NEXT:          Value:         65536
-# EXTENDED-CONST-NEXT:      - Index:           7
-# EXTENDED-CONST-NEXT:        Type:            I32
-# EXTENDED-CONST-NEXT:        Mutable:         false
-# EXTENDED-CONST-NEXT:        InitExpr:
 # EXTENDED-CONST-NEXT:          Extended:        true
 # This instruction sequence decodes to:
 # (global.get[0x23] 0x1 i32.const[0x41] 0x0C i32.add[0x6A] end[0x0b])
 # EXTENDED-CONST-NEXT:          Body:            2301410C6A0B
-# EXTENDED-CONST-NEXT:      - Index:           8
+# EXTENDED-CONST-NEXT:      - Index:           7
 # EXTENDED-CONST-NEXT:        Type:            I32
 # EXTENDED-CONST-NEXT:        Mutable:         false
 # EXTENDED-CONST-NEXT:        InitExpr:
 # EXTENDED-CONST-NEXT:          Opcode:        GLOBAL_GET
 # EXTENDED-CONST-NEXT:          Index:         2
-# EXTENDED-CONST-NEXT:      - Index:           9
+# EXTENDED-CONST-NEXT:      - Index:           8
 # EXTENDED-CONST-NEXT:        Type:            I32
 # EXTENDED-CONST-NEXT:        Mutable:         false
 # EXTENDED-CONST-NEXT:        InitExpr:

diff  --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index e26f818fc2ca0..e1192706ea913 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -440,8 +440,6 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const {
                                  : WASM_OPCODE_I32_ADD;
 
   for (const Symbol *sym : internalGotSymbols) {
-    if (sym->flags & WASM_SYMBOL_ABSOLUTE)
-      continue;
     if (TLS != sym->isTLS())
       continue;
 
@@ -505,8 +503,7 @@ void GlobalSection::writeBody() {
     bool useExtendedConst = false;
     uint32_t globalIdx;
     int64_t offset;
-    if (ctx.arg.extendedConst && ctx.isPic &&
-        (sym->flags & WASM_SYMBOL_ABSOLUTE) == 0) {
+    if (ctx.arg.extendedConst && ctx.isPic) {
       if (auto *d = dyn_cast<DefinedData>(sym)) {
         if (!sym->isTLS()) {
           globalIdx = ctx.sym.memoryBase->getGlobalIndex();

diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 2445005bf98ce..520ce4deb9a57 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1027,9 +1027,16 @@ static inline bool isAddLike(const SDValue V) {
          (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
 }
 
+static SDValue stripAssertAlign(SDValue N) {
+  if (N.getOpcode() == ISD::AssertAlign)
+    N = N.getOperand(0);
+  return N;
+}
+
 // selectBaseADDR - Match a dag node which will serve as the base address for an
 // ADDR operand pair.
 static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
+  N = stripAssertAlign(N);
   if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
     return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
                                        GA->getValueType(0), GA->getOffset(),
@@ -1044,6 +1051,7 @@ static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
 }
 
 static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
+  Addr = stripAssertAlign(Addr);
   APInt AccumulatedOffset(64u, 0);
   while (isAddLike(Addr)) {
     const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
@@ -1055,7 +1063,7 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
       break;
 
     AccumulatedOffset += CI;
-    Addr = Addr->getOperand(0);
+    Addr = stripAssertAlign(Addr->getOperand(0));
   }
   return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
                                       MVT::i32);

diff  --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index f4362fe8d9056..e2bbe57c0085c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -412,6 +412,22 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,
   }
 }
 
+// Create a call to the nvvm_internal_addrspace_wrap intrinsic and set the
+// alignment of the return value based on the alignment of the argument.
+static CallInst *createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB,
+                                                 Argument &Arg) {
+  CallInst *ArgInParam =
+      IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
+                          {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()},
+                          &Arg, {}, Arg.getName() + ".param");
+
+  if (MaybeAlign ParamAlign = Arg.getParamAlign())
+    ArgInParam->addRetAttr(
+        Attribute::getWithAlignment(ArgInParam->getContext(), *ParamAlign));
+
+  return ArgInParam;
+}
+
 namespace {
 struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> {
   using Base = PtrUseVisitor<ArgUseChecker>;
@@ -515,10 +531,7 @@ void copyByValParam(Function &F, Argument &Arg) {
       Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
   Arg.replaceAllUsesWith(AllocA);
 
-  Value *ArgInParam =
-      IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
-                          {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()},
-                          &Arg, {}, Arg.getName());
+  CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);
 
   // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
   // addrspacecast preserves alignment.  Since params are constant, this load
@@ -549,9 +562,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
     SmallVector<Use *, 16> UsesToUpdate(llvm::make_pointer_range(Arg->uses()));
 
     IRBuilder<> IRB(&*FirstInst);
-    Value *ArgInParamAS = IRB.CreateIntrinsic(
-        Intrinsic::nvvm_internal_addrspace_wrap,
-        {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()}, {Arg});
+    CallInst *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg);
 
     for (Use *U : UsesToUpdate)
       convertToParamAS(U, ArgInParamAS, HasCvtaParam, IsGridConstant);
@@ -581,10 +592,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
     // argument already in the param address space, we need to use the noop
     // intrinsic, this had the added benefit of preventing other optimizations
     // from folding away this pair of addrspacecasts.
-    auto *ParamSpaceArg =
-        IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
-                            {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()},
-                            Arg, {}, Arg->getName() + ".param");
+    auto *ParamSpaceArg = createNVVMInternalAddrspaceWrap(IRB, *Arg);
 
     // Cast param address to generic address space.
     Value *GenericArg = IRB.CreateAddrSpaceCast(

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3c19cea2bd1dd..4a1db80076530 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20759,12 +20759,22 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
         isNullConstant(Src.getOperand(1)) &&
         Src.getOperand(0).getValueType().isScalableVector()) {
       EVT VT = N->getValueType(0);
-      EVT SrcVT = Src.getOperand(0).getValueType();
-      assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
+      SDValue EVSrc = Src.getOperand(0);
+      EVT EVSrcVT = EVSrc.getValueType();
+      assert(EVSrcVT.getVectorElementType() == VT.getVectorElementType());
       // Widths match, just return the original vector.
-      if (SrcVT == VT)
-        return Src.getOperand(0);
-      // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
+      if (EVSrcVT == VT)
+        return EVSrc;
+      SDLoc DL(N);
+      // Width is narrower, using insert_subvector.
+      if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
+        return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+                           EVSrc,
+                           DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+      }
+      // Width is wider, using extract_subvector.
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
+                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));
     }
     [[fallthrough]];
   }

diff  --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index b46f4829605a1..f6333d68a8ea5 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -118,6 +118,7 @@ set(LLVM_TEST_DEPENDS
           llvm-objdump
           llvm-opt-fuzzer
           llvm-opt-report
+          llvm-offload-wrapper
           llvm-otool
           llvm-pdbutil
           llvm-profdata

diff  --git a/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
new file mode 100644
index 0000000000000..2051f6305cc03
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=nvptx-lower-args,infer-alignment -S | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+; ------------------------------------------------------------------------------
+; Test that alignment can be inferred through llvm.nvvm.internal.addrspace.wrap.p101.p0 intrinsics
+; thanks to the alignment attribute on the intrinsic
+; ------------------------------------------------------------------------------
+
+%struct.S1 = type { i32, i32, i32, i32 }
+define ptx_kernel i32 @test_align8(ptr noundef readonly byval(%struct.S1) align 8 captures(none) %params) {
+; CHECK-LABEL: define ptx_kernel i32 @test_align8(
+; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 8 captures(none) [[PARAMS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 8
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+entry:
+  %load = load i32, ptr %params, align 4
+  ret i32 %load
+}
+
+define ptx_kernel i32 @test_align1(ptr noundef readonly byval(%struct.S1) align 1 captures(none) %params) {
+; CHECK-LABEL: define ptx_kernel i32 @test_align1(
+; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 4 captures(none) [[PARAMS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call align 1 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+entry:
+  %load = load i32, ptr %params, align 4
+  ret i32 %load
+}

diff  --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 045704bdcd3fc..f5df0fcde1883 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -72,7 +72,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_int(
 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INPUT11:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT:    [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; OPT-NEXT:    [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
 ; OPT-NEXT:    [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
 ; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT]], align 4
@@ -101,7 +101,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_struct(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INPUT1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT:    [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
 ; OPT-NEXT:    [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
 ; OPT-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
@@ -137,7 +137,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT:    [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
 ; OPT-NEXT:    [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
 ; OPT-NEXT:    [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
 ; OPT-NEXT:    ret void
@@ -180,9 +180,9 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
+; OPT-NEXT:    [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
 ; OPT-NEXT:    [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
-; OPT-NEXT:    [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT:    [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
 ; OPT-NEXT:    [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
 ; OPT-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; OPT-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -208,7 +208,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT:    [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
 ; OPT-NEXT:    [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
 ; OPT-NEXT:    store ptr [[INPUT1]], ptr [[ADDR]], align 8
 ; OPT-NEXT:    ret void
@@ -235,7 +235,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
 ; PTX-NOT      .local
 ; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT:    [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
 ; OPT-NEXT:    [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
 ; OPT-NEXT:    [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
@@ -357,7 +357,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_phi(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT:    [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
 ; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -416,7 +416,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
 ; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; OPT-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
-; OPT-NEXT:    [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT:    [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
 ; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -471,7 +471,7 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
 ; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; OPT-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
-; OPT-NEXT:    [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT:    [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
 ; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -520,7 +520,7 @@ declare void @device_func(ptr byval(i32) align 4)
 define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
 ; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg(
 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INPUT_PARAM:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT:    [[INPUT_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
 ; OPT-NEXT:    [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT_PARAM]] to ptr
 ; OPT-NEXT:    call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]])
 ; OPT-NEXT:    ret void

diff  --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 7c029ab516d6e..b4a51035c6610 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -200,7 +200,7 @@ define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
 define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) {
 ; IRC-LABEL: define ptx_kernel void @ptr_as_int_aggr(
 ; IRC-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
-; IRC-NEXT:    [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; IRC-NEXT:    [[S3:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; IRC-NEXT:    [[I:%.*]] = load i64, ptr addrspace(101) [[S3]], align 8
 ; IRC-NEXT:    [[P:%.*]] = inttoptr i64 [[I]] to ptr
 ; IRC-NEXT:    [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
@@ -210,7 +210,7 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
 ;
 ; IRO-LABEL: define ptx_kernel void @ptr_as_int_aggr(
 ; IRO-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
-; IRO-NEXT:    [[S1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; IRO-NEXT:    [[S1:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; IRO-NEXT:    [[I:%.*]] = load i64, ptr addrspace(101) [[S1]], align 8
 ; IRO-NEXT:    [[P:%.*]] = inttoptr i64 [[I]] to ptr
 ; IRO-NEXT:    store i32 [[V]], ptr [[P]], align 4

diff  --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 20a35198c3c16..4d36ff9496ede 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -32,7 +32,7 @@ define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out
 ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only(
 ; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; LOWER-ARGS-NEXT:  [[ENTRY:.*:]]
-; LOWER-ARGS-NEXT:    [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; LOWER-ARGS-NEXT:    [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; LOWER-ARGS-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4
 ; LOWER-ARGS-NEXT:    store i32 [[I]], ptr [[OUT]], align 4
 ; LOWER-ARGS-NEXT:    ret void
@@ -66,7 +66,7 @@ define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly
 ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only_gep(
 ; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; LOWER-ARGS-NEXT:  [[ENTRY:.*:]]
-; LOWER-ARGS-NEXT:    [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; LOWER-ARGS-NEXT:    [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; LOWER-ARGS-NEXT:    [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
 ; LOWER-ARGS-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
 ; LOWER-ARGS-NEXT:    store i32 [[I]], ptr [[OUT]], align 4
@@ -128,7 +128,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
 ; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; COMMON-NEXT:  [[ENTRY:.*:]]
 ; COMMON-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT:    [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
 ; COMMON-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[S1]]) #[[ATTR6:[0-9]+]]
 ; COMMON-NEXT:    ret void
@@ -167,7 +167,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
 ; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; COMMON-NEXT:  [[ENTRY:.*:]]
 ; COMMON-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT:    [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
 ; COMMON-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
 ; COMMON-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR6]]
@@ -209,7 +209,7 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
 ; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; COMMON-NEXT:  [[ENTRY:.*:]]
 ; COMMON-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT:    [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
 ; COMMON-NEXT:    store ptr [[S1]], ptr [[OUT]], align 8
 ; COMMON-NEXT:    ret void
@@ -246,7 +246,7 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
 ; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; COMMON-NEXT:  [[ENTRY:.*:]]
 ; COMMON-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT:    [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
 ; COMMON-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
 ; COMMON-NEXT:    store ptr [[B]], ptr [[OUT]], align 8
@@ -286,7 +286,7 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
 ; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; COMMON-NEXT:  [[ENTRY:.*:]]
 ; COMMON-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT:    [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
 ; COMMON-NEXT:    [[I:%.*]] = ptrtoint ptr [[S1]] to i64
 ; COMMON-NEXT:    store i64 [[I]], ptr [[OUT]], align 8
@@ -324,7 +324,7 @@ define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeo
 ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
 ; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; LOWER-ARGS-NEXT:  [[ENTRY:.*:]]
-; LOWER-ARGS-NEXT:    [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; LOWER-ARGS-NEXT:    [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; LOWER-ARGS-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S3]], i64 16, i1 true)
 ; LOWER-ARGS-NEXT:    ret void
 ;
@@ -445,7 +445,7 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
 ; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; COMMON-NEXT:  [[ENTRY:.*:]]
 ; COMMON-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT:    [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
 ; COMMON-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true)
 ; COMMON-NEXT:    ret void
@@ -525,7 +525,7 @@ define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly %
 ; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; COMMON-NEXT:  [[BB:.*:]]
 ; COMMON-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT:    [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
 ; COMMON-NEXT:    [[I:%.*]] = load i32, ptr [[IN]], align 4
 ; COMMON-NEXT:    store i32 [[I]], ptr [[S1]], align 4
@@ -551,7 +551,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
 ; SM_60-NEXT:    [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
 ; SM_60-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
-; SM_60-NEXT:    [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_60-NEXT:    [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
 ; SM_60-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
 ; SM_60-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
@@ -563,7 +563,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
 ; SM_70-NEXT:  [[BB:.*:]]
 ; SM_70-NEXT:    [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; SM_70-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
-; SM_70-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_70-NEXT:    [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; SM_70-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
 ; SM_70-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
 ; SM_70-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
@@ -577,7 +577,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
 ; COPY-NEXT:    [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
 ; COPY-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
-; COPY-NEXT:    [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COPY-NEXT:    [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
 ; COPY-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
 ; COPY-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
@@ -637,7 +637,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
 ; COMMON-NEXT:    [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
 ; COMMON-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
-; COMMON-NEXT:    [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COMMON-NEXT:    [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
 ; COMMON-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
 ; COMMON-NEXT:    store i32 1, ptr [[PTRNEW]], align 4
@@ -682,7 +682,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
 ; SM_60-NEXT:    [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
 ; SM_60-NEXT:    [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
-; SM_60-NEXT:    [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_60-NEXT:    [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
 ; SM_60-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; SM_60:       [[FIRST]]:
@@ -702,7 +702,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
 ; SM_70-NEXT:  [[BB:.*:]]
 ; SM_70-NEXT:    [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; SM_70-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
-; SM_70-NEXT:    [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_70-NEXT:    [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; SM_70-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
 ; SM_70-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; SM_70:       [[FIRST]]:
@@ -724,7 +724,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
 ; COPY-NEXT:    [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT23]], ptr addrspace(101) align 8 [[INPUT24]], i64 8, i1 false)
 ; COPY-NEXT:    [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
-; COPY-NEXT:    [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COPY-NEXT:    [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
 ; COPY-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; COPY:       [[FIRST]]:
@@ -808,7 +808,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
 ; COMMON-NEXT:    [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
 ; COMMON-NEXT:    [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT:    [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COMMON-NEXT:    [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
 ; COMMON-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; COMMON:       [[FIRST]]:
@@ -871,7 +871,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
 ; COMMON-LABEL: define ptx_kernel void @test_forward_byval_arg(
 ; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] {
 ; COMMON-NEXT:    [[INPUT1:%.*]] = alloca i32, align 4
-; COMMON-NEXT:    [[INPUT2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; COMMON-NEXT:    [[INPUT2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
 ; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false)
 ; COMMON-NEXT:    call void @device_func(ptr byval(i32) align 4 [[INPUT1]])
 ; COMMON-NEXT:    ret void

diff  --git a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
new file mode 100644
index 0000000000000..da912bf401ec0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr='+v,+zvl512b' < %s | FileCheck %s
+
+define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x float> %arg2) {
+; CHECK-LABEL: redundant_vfmv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vfredusum.vs v9, v12, v8
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vfredusum.vs v8, v16, v8
+; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vrgather.vi v8, v9, 0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    ret
+  %s0 = extractelement <2 x float> %arg0, i64 0
+  %r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)
+  %a0 = insertelement <2 x float> poison, float %r0, i64 0
+  %s1 = extractelement <2 x float> %arg0, i64 1
+  %r1 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s1, <64 x float> %arg2)
+  %a1 = insertelement <2 x float> %a0, float %r1, i64 1
+  ret <2 x float> %a1
+}

diff  --git a/llvm/test/Other/offload-wrapper.ll b/llvm/test/Other/offload-wrapper.ll
new file mode 100644
index 0000000000000..9107a141ad201
--- /dev/null
+++ b/llvm/test/Other/offload-wrapper.ll
@@ -0,0 +1,52 @@
+; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP
+
+;      HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
+; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
+; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
+; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+; HIP-NEXT: @.hip.binary_handle = internal global ptr null
+; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
+
+;      HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT:   %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+; HIP-NEXT:   store ptr %0, ptr @.hip.binary_handle, align 8
+; HIP-NEXT:   call void @.hip.globals_reg(ptr %0)
+; HIP-NEXT:   %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+; HIP-NEXT:   ret void
+; HIP-NEXT: }
+
+;      HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT:   %0 = load ptr, ptr @.hip.binary_handle, align 8
+; HIP-NEXT:   call void @__hipUnregisterFatBinary(ptr %0)
+; HIP-NEXT:   ret void
+; HIP-NEXT: }
+
+; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
+
+;      CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
+; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
+; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
+; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
+; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
+; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
+
+;      CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT:   %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
+; CUDA-NEXT:   store ptr %0, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT:   call void @.cuda.globals_reg(ptr %0)
+; CUDA-NEXT:   call void @__cudaRegisterFatBinaryEnd(ptr %0)
+; CUDA-NEXT:   %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
+; CUDA-NEXT:   ret void
+; CUDA-NEXT: }
+
+;      CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT:   %0 = load ptr, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT:   call void @__cudaUnregisterFatBinary(ptr %0)
+; CUDA-NEXT:   ret void
+; CUDA-NEXT: }

diff  --git a/llvm/tools/llvm-offload-wrapper/CMakeLists.txt b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt
new file mode 100644
index 0000000000000..18acbe2caaded
--- /dev/null
+++ b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(LLVM_LINK_COMPONENTS
+  BitWriter
+  Core
+  Object
+  Option
+  FrontendOffloading
+  Support
+  TargetParser
+  )
+
+add_llvm_tool(llvm-offload-wrapper
+  llvm-offload-wrapper.cpp
+
+  DEPENDS
+  intrinsics_gen
+  )

diff  --git a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
new file mode 100644
index 0000000000000..9dac1646b1e26
--- /dev/null
+++ b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
@@ -0,0 +1,135 @@
+//===- llvm-offload-wrapper: Create runtime registration code for devices -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides a utility for generating runtime registration code for device code.
+// We take a binary image (CUDA fatbinary, HIP offload bundle, LLVM binary) and
+// create a new IR module that calls the respective runtime to load it on the
+// device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Frontend/Offloading/OffloadWrapper.h"
+#include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/Object/OffloadBinary.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Host.h"
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+static cl::OptionCategory
+    OffloadWrapeprCategory("llvm-offload-wrapper options");
+
+static cl::opt<object::OffloadKind> Kind(
+    "kind", cl::desc("Wrap for offload kind:"), cl::cat(OffloadWrapeprCategory),
+    cl::Required,
+    cl::values(clEnumValN(object::OFK_OpenMP, "openmp", "Wrap OpenMP binaries"),
+               clEnumValN(object::OFK_Cuda, "cuda", "Wrap CUDA binaries"),
+               clEnumValN(object::OFK_HIP, "hip", "Wrap HIP binaries")));
+
+static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."),
+                                       cl::value_desc("file"),
+                                       cl::cat(OffloadWrapeprCategory));
+
+static cl::list<std::string> InputFiles(cl::Positional,
+                                        cl::desc("Wrap input from <file>"),
+                                        cl::value_desc("file"), cl::OneOrMore,
+                                        cl::cat(OffloadWrapeprCategory));
+
+static cl::opt<std::string>
+    TheTriple("triple", cl::desc("Target triple for the wrapper module"),
+              cl::init(sys::getDefaultTargetTriple()),
+              cl::cat(OffloadWrapeprCategory));
+
+static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) {
+  if (BuffersToWrap.size() > 1 &&
+      (Kind == llvm::object::OFK_Cuda || Kind == llvm::object::OFK_HIP))
+    return createStringError(
+        "CUDA / HIP offloading uses a single fatbinary or offload bundle");
+
+  LLVMContext Context;
+  Module M("offload.wrapper.module", Context);
+  M.setTargetTriple(Triple());
+
+  switch (Kind) {
+  case llvm::object::OFK_OpenMP:
+    if (Error Err = offloading::wrapOpenMPBinaries(
+            M, BuffersToWrap, offloading::getOffloadEntryArray(M),
+            /*Suffix=*/"", /*Relocatable=*/false))
+      return Err;
+    break;
+  case llvm::object::OFK_Cuda:
+    if (Error Err = offloading::wrapCudaBinary(
+            M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M),
+            /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
+      return Err;
+    break;
+  case llvm::object::OFK_HIP:
+    if (Error Err = offloading::wrapHIPBinary(
+            M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M)))
+      return Err;
+    break;
+  default:
+    return createStringError(getOffloadKindName(Kind) +
+                             " wrapping is not supported");
+  }
+
+  int FD = -1;
+  if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
+    return errorCodeToError(EC);
+  llvm::raw_fd_ostream OS(FD, true);
+  WriteBitcodeToFile(M, OS);
+
+  return Error::success();
+}
+
+int main(int argc, char **argv) {
+  InitLLVM X(argc, argv);
+  cl::HideUnrelatedOptions(OffloadWrapeprCategory);
+  cl::ParseCommandLineOptions(
+      argc, argv,
+      "Generate runtime registration code for a device binary image\n");
+
+  if (Help) {
+    cl::PrintHelpMessage();
+    return EXIT_SUCCESS;
+  }
+
+  auto ReportError = [argv](Error E) {
+    logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
+    exit(EXIT_FAILURE);
+  };
+
+  SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+  SmallVector<ArrayRef<char>> BuffersToWrap;
+  for (StringRef Input : InputFiles) {
+    ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+        MemoryBuffer::getFileOrSTDIN(Input);
+    if (std::error_code EC = BufferOrErr.getError())
+      ReportError(createFileError(Input, EC));
+    std::unique_ptr<MemoryBuffer> &Buffer =
+        Buffers.emplace_back(std::move(*BufferOrErr));
+    BuffersToWrap.emplace_back(
+        ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
+  }
+
+  if (Error Err = wrapImages(BuffersToWrap))
+    ReportError(std::move(Err));
+
+  return EXIT_SUCCESS;
+}

diff  --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index c0babb85f5c46..67a4c43455809 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -689,7 +689,7 @@ isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
 
   // Set of regions of which the entry block domain has been propagated to BB.
   // all predecessors inside any of the regions can be skipped.
-  SmallSet<Region *, 8> PropagatedRegions;
+  SmallPtrSet<Region *, 8> PropagatedRegions;
 
   for (auto *PredBB : predecessors(BB)) {
     // Skip backedges.


        


More information about the llvm-branch-commits mailing list