[llvm-branch-commits] [llvm] eef17cb - Merge branch 'main' into revert-154018-fix/153745
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 19 09:49:09 PDT 2025
Author: Oleksandr T.
Date: 2025-08-19T19:48:53+03:00
New Revision: eef17cb4dc546d528d5a80d8f681bedd83766891
URL: https://github.com/llvm/llvm-project/commit/eef17cb4dc546d528d5a80d8f681bedd83766891
DIFF: https://github.com/llvm/llvm-project/commit/eef17cb4dc546d528d5a80d8f681bedd83766891.diff
LOG: Merge branch 'main' into revert-154018-fix/153745
Added:
clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl
clang/test/CodeGenHLSL/resources/res-array-local1.hlsl
clang/test/CodeGenHLSL/resources/res-array-local2.hlsl
clang/test/CodeGenHLSL/resources/res-array-local3.hlsl
llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
llvm/test/Other/offload-wrapper.ll
llvm/tools/llvm-offload-wrapper/CMakeLists.txt
llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
Modified:
clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
clang-tools-extra/clangd/AST.cpp
clang-tools-extra/clangd/XRefs.cpp
clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
clang/docs/ClangLinkerWrapper.rst
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Driver/ToolChain.h
clang/lib/Driver/ToolChain.cpp
clang/lib/Driver/ToolChains/Gnu.cpp
clang/lib/Driver/ToolChains/Gnu.h
clang/lib/Driver/ToolChains/Hurd.cpp
clang/lib/Driver/ToolChains/Linux.cpp
clang/lib/Driver/ToolChains/Managarm.cpp
lld/test/wasm/pie.s
lld/wasm/SyntheticSections.cpp
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CMakeLists.txt
llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
llvm/test/CodeGen/NVPTX/lower-args.ll
llvm/test/CodeGen/NVPTX/lower-byval-args.ll
polly/lib/Analysis/ScopBuilder.cpp
Removed:
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/include/c++/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/include/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/include/c++/.keep
clang/test/Driver/gcc-toolchain-libstdcxx.cpp
################################################################################
diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
index 4b495e3877000..cda9c4e7a6e58 100644
--- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp
@@ -188,7 +188,7 @@ static bool isKnownToHaveValue(const Expr &Cond, const ASTContext &Ctx,
/// \return true iff all `CallExprs` visited have callees; false otherwise
/// indicating there is an unresolved indirect call.
static bool populateCallees(const Stmt *StmtNode,
- llvm::SmallSet<const Decl *, 16> &Callees) {
+ llvm::SmallPtrSet<const Decl *, 16> &Callees) {
if (const auto *Call = dyn_cast<CallExpr>(StmtNode)) {
const Decl *Callee = Call->getDirectCallee();
@@ -212,7 +212,7 @@ static bool populateCallees(const Stmt *StmtNode,
/// returns true iff `SCC` contains `Func` and its' function set overlaps with
/// `Callees`
static bool overlap(ArrayRef<CallGraphNode *> SCC,
- const llvm::SmallSet<const Decl *, 16> &Callees,
+ const llvm::SmallPtrSet<const Decl *, 16> &Callees,
const Decl *Func) {
bool ContainsFunc = false, Overlap = false;
@@ -264,7 +264,7 @@ static bool hasRecursionOverStaticLoopCondVariables(const Expr *Cond,
if (!hasStaticLocalVariable(Cond))
return false;
- llvm::SmallSet<const Decl *, 16> CalleesInLoop;
+ llvm::SmallPtrSet<const Decl *, 16> CalleesInLoop;
if (!populateCallees(LoopStmt, CalleesInLoop)) {
// If there are unresolved indirect calls, we assume there could
diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp
index 82aee4c84d074..2f46ecc92576c 100644
--- a/clang-tools-extra/clangd/AST.cpp
+++ b/clang-tools-extra/clangd/AST.cpp
@@ -985,7 +985,7 @@ resolveForwardingParameters(const FunctionDecl *D, unsigned MaxDepth) {
// Recurse on pack parameters
size_t Depth = 0;
const FunctionDecl *CurrentFunction = D;
- llvm::SmallSet<const FunctionTemplateDecl *, 4> SeenTemplates;
+ llvm::SmallPtrSet<const FunctionTemplateDecl *, 4> SeenTemplates;
if (const auto *Template = D->getPrimaryTemplate()) {
SeenTemplates.insert(Template);
}
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 11ee51072dccf..a253a630a48cc 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -1876,7 +1876,7 @@ static void fillSubTypes(const SymbolID &ID,
});
}
-using RecursionProtectionSet = llvm::SmallSet<const CXXRecordDecl *, 4>;
+using RecursionProtectionSet = llvm::SmallPtrSet<const CXXRecordDecl *, 4>;
// Extracts parents from AST and populates the type hierarchy item.
static void fillSuperTypes(const CXXRecordDecl &CXXRD, llvm::StringRef TUPath,
diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
index 134ae89288300..bc9a790232507 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp
@@ -181,7 +181,7 @@ struct ExtractionZone {
bool requiresHoisting(const SourceManager &SM,
const HeuristicResolver *Resolver) const {
// First find all the declarations that happened inside extraction zone.
- llvm::SmallSet<const Decl *, 1> DeclsInExtZone;
+ llvm::SmallPtrSet<const Decl *, 1> DeclsInExtZone;
for (auto *RootStmt : RootStmts) {
findExplicitReferences(
RootStmt,
diff --git a/clang/docs/ClangLinkerWrapper.rst b/clang/docs/ClangLinkerWrapper.rst
index e69cdba434c93..eb38d2b8fb5ee 100644
--- a/clang/docs/ClangLinkerWrapper.rst
+++ b/clang/docs/ClangLinkerWrapper.rst
@@ -60,6 +60,10 @@ only for the linker wrapper will be forwarded to the wrapped linker job.
--v Display the version number and exit
-- The separator for the wrapped linker arguments
+The linker wrapper will generate the appropriate runtime calls to register the
+generated device binary with the offloading runtime. To do this step manually we
+provide the ``llvm-offload-wrapper`` utility.
+
Relocatable Linking
===================
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8ee1586e72e01..2a4c5ab4cf0cb 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -37,22 +37,6 @@ latest release, please see the `Clang Web Site <https://clang.llvm.org>`_ or the
Potentially Breaking Changes
============================
-- Clang will now emit a warning if the auto-detected GCC installation
- directory (i.e. the one with the largest version number) does not
- contain libstdc++ include directories although a "complete" GCC
- installation directory containing the include directories is
- available. It is planned to change the auto-detection to prefer the
- "complete" directory in the future. The warning will disappear if
- the libstdc++ include directories are either installed or removed
- for all GCC installation directories considered by the
- auto-detection; see the output of ``clang -v`` for a list of those
- directories. If the GCC installations cannot be modified and
- maintaining the current choice of the auto-detection is desired, the
- GCC installation directory can be selected explicitly using the
- ``--gcc-install-dir`` command line argument. This will silence the
- warning. It can also be disabled using the
- ``-Wno-gcc-install-dir-libstdcxx`` command line flag.
-
C/C++ Language Potentially Breaking Changes
-------------------------------------------
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index b8c7c6e8d6909..6df8f9932f30f 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -885,9 +885,4 @@ def warn_drv_openacc_without_cir
: Warning<"OpenACC directives will result in no runtime behavior; use "
"-fclangir to enable runtime effect">,
InGroup<SourceUsesOpenACC>;
-
-def warn_drv_gcc_install_dir_libstdcxx : Warning<
- "future releases of the clang compiler will prefer GCC installations "
- "containing libstdc++ include directories; '%0' would be chosen over '%1'">,
- InGroup<DiagGroup<"gcc-install-dir-libstdcxx">>;
}
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 1425714d34110..243056360370f 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -224,6 +224,9 @@ class ToolChain {
static void addSystemFrameworkInclude(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
const Twine &Path);
+ static void addSystemInclude(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ const Twine &Path);
static void addExternCSystemInclude(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
const Twine &Path);
@@ -243,9 +246,6 @@ class ToolChain {
///@}
public:
- static void addSystemInclude(const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args,
- const Twine &Path);
virtual ~ToolChain();
// Accessors
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 65b36217a940f..7667dbddb0ca2 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1409,6 +1409,13 @@ void ToolChain::addSystemFrameworkInclude(const llvm::opt::ArgList &DriverArgs,
CC1Args.push_back(DriverArgs.MakeArgString(Path));
}
+/// Utility function to add a system include directory to CC1 arguments.
+void ToolChain::addSystemInclude(const ArgList &DriverArgs,
+ ArgStringList &CC1Args, const Twine &Path) {
+ CC1Args.push_back("-internal-isystem");
+ CC1Args.push_back(DriverArgs.MakeArgString(Path));
+}
+
/// Utility function to add a system include directory with extern "C"
/// semantics to CC1 arguments.
///
@@ -1431,14 +1438,6 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs,
addExternCSystemInclude(DriverArgs, CC1Args, Path);
}
-/// Utility function to add a system include directory to CC1 arguments.
-/*static*/ void ToolChain::addSystemInclude(const ArgList &DriverArgs,
- ArgStringList &CC1Args,
- const Twine &Path) {
- CC1Args.push_back("-internal-isystem");
- CC1Args.push_back(DriverArgs.MakeArgString(Path));
-}
-
/// Utility function to add a list of system framework directories to CC1.
void ToolChain::addSystemFrameworkIncludes(const ArgList &DriverArgs,
ArgStringList &CC1Args,
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 3dade2bdf2277..01b146db24f3e 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -2123,11 +2123,10 @@ void Generic_GCC::GCCInstallationDetector::init(
StringRef TripleText =
llvm::sys::path::filename(llvm::sys::path::parent_path(InstallDir));
- SelectedInstallation.Version = GCCVersion::Parse(VersionText);
- SelectedInstallation.GCCTriple.setTriple(TripleText);
- SelectedInstallation.GCCInstallPath = std::string(InstallDir);
- SelectedInstallation.GCCParentLibPath =
- SelectedInstallation.GCCInstallPath + "/../../..";
+ Version = GCCVersion::Parse(VersionText);
+ GCCTriple.setTriple(TripleText);
+ GCCInstallPath = std::string(InstallDir);
+ GCCParentLibPath = GCCInstallPath + "/../../..";
IsValid = true;
}
return;
@@ -2187,7 +2186,7 @@ void Generic_GCC::GCCInstallationDetector::init(
// Loop over the various components which exist and select the best GCC
// installation available. GCC installs are ranked by version number.
const GCCVersion VersionZero = GCCVersion::Parse("0.0.0");
- SelectedInstallation.Version = VersionZero;
+ Version = VersionZero;
for (const std::string &Prefix : Prefixes) {
auto &VFS = D.getVFS();
if (!VFS.exists(Prefix))
@@ -2215,7 +2214,7 @@ void Generic_GCC::GCCInstallationDetector::init(
}
// Skip other prefixes once a GCC installation is found.
- if (SelectedInstallation.Version > VersionZero)
+ if (Version > VersionZero)
break;
}
}
@@ -2224,17 +2223,14 @@ void Generic_GCC::GCCInstallationDetector::print(raw_ostream &OS) const {
for (const auto &InstallPath : CandidateGCCInstallPaths)
OS << "Found candidate GCC installation: " << InstallPath << "\n";
- if (!SelectedInstallation.GCCInstallPath.empty())
- OS << "Selected GCC installation: " << SelectedInstallation.GCCInstallPath
- << "\n";
+ if (!GCCInstallPath.empty())
+ OS << "Selected GCC installation: " << GCCInstallPath << "\n";
for (const auto &Multilib : Multilibs)
OS << "Candidate multilib: " << Multilib << "\n";
- if (Multilibs.size() != 0 ||
- !SelectedInstallation.SelectedMultilib.isDefault())
- OS << "Selected multilib: " << SelectedInstallation.SelectedMultilib
- << "\n";
+ if (Multilibs.size() != 0 || !SelectedMultilib.isDefault())
+ OS << "Selected multilib: " << SelectedMultilib << "\n";
}
bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
@@ -2772,50 +2768,14 @@ bool Generic_GCC::GCCInstallationDetector::ScanGCCForMultilibs(
}
Multilibs = Detected.Multilibs;
- SelectedInstallation.SelectedMultilib =
- Detected.SelectedMultilibs.empty() ? Multilib()
- : Detected.SelectedMultilibs.back();
+ SelectedMultilib = Detected.SelectedMultilibs.empty()
+ ? Multilib()
+ : Detected.SelectedMultilibs.back();
BiarchSibling = Detected.BiarchSibling;
return true;
}
-bool Generic_GCC::GCCInstallationDetector::SelectGCCInstallationDirectory(
- const SmallVector<Generic_GCC::GCCInstallCandidate, 3> &Installations,
- const ArgList &Args,
- Generic_GCC::GCCInstallCandidate &SelectedInstallation) const {
- if (Installations.empty())
- return false;
-
- SelectedInstallation =
- *max_element(Installations, [](const auto &Max, const auto &I) {
- return I.Version > Max.Version;
- });
-
- // FIXME Start selecting installation with libstdc++ in clang 22,
- // using the current way of selecting the installation as a fallback
- // only. For now, warn if the installation with libstdc++
diff ers
- // from SelectedInstallation.
- const GCCInstallCandidate *InstallWithIncludes = nullptr;
- for (const auto &I : Installations) {
- if ((!InstallWithIncludes || I.Version > InstallWithIncludes->Version) &&
- GCCInstallationHasLibStdcxxIncludePaths(I, Args))
- InstallWithIncludes = &I;
- }
-
- if (InstallWithIncludes && SelectedInstallation.GCCInstallPath !=
- InstallWithIncludes->GCCInstallPath)
- D.Diag(diag::warn_drv_gcc_install_dir_libstdcxx)
- << InstallWithIncludes->GCCInstallPath
- << SelectedInstallation.GCCInstallPath;
-
- // TODO Warn if SelectedInstallation does not contain libstdc++ includes
- // although compiler flags indicate that it is required (C++ compilation,
- // libstdc++ not explicitly disabled).
-
- return true;
-}
-
void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
const llvm::Triple &TargetTriple, const ArgList &Args,
const std::string &LibDir, StringRef CandidateTriple,
@@ -2845,7 +2805,6 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
TargetTriple.getVendor() == llvm::Triple::Freescale ||
TargetTriple.getVendor() == llvm::Triple::OpenEmbedded}};
- SmallVector<GCCInstallCandidate, 3> Installations;
for (auto &Suffix : Suffixes) {
if (!Suffix.Active)
continue;
@@ -2863,31 +2822,23 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
continue; // Saw this path before; no need to look at it again.
if (CandidateVersion.isOlderThan(4, 1, 1))
continue;
- if (CandidateVersion <= SelectedInstallation.Version && IsValid)
+ if (CandidateVersion <= Version)
continue;
if (!ScanGCCForMultilibs(TargetTriple, Args, LI->path(),
NeedsBiarchSuffix))
continue;
- GCCInstallCandidate Installation;
- Installation.Version = CandidateVersion;
- Installation.GCCTriple.setTriple(CandidateTriple);
+ Version = CandidateVersion;
+ GCCTriple.setTriple(CandidateTriple);
// FIXME: We hack together the directory name here instead of
// using LI to ensure stable path separators across Windows and
// Linux.
- Installation.GCCInstallPath =
- (LibDir + "/" + LibSuffix + "/" + VersionText).str();
- Installation.GCCParentLibPath =
- (Installation.GCCInstallPath + "/../" + Suffix.ReversePath).str();
- Installation.SelectedMultilib = getMultilib();
-
- Installations.push_back(Installation);
+ GCCInstallPath = (LibDir + "/" + LibSuffix + "/" + VersionText).str();
+ GCCParentLibPath = (GCCInstallPath + "/../" + Suffix.ReversePath).str();
+ IsValid = true;
}
}
-
- IsValid |=
- SelectGCCInstallationDirectory(Installations, Args, SelectedInstallation);
}
bool Generic_GCC::GCCInstallationDetector::ScanGentooConfigs(
@@ -2965,12 +2916,10 @@ bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig(
NeedsBiarchSuffix))
continue;
- SelectedInstallation.Version =
- GCCVersion::Parse(ActiveVersion.second);
- SelectedInstallation.GCCInstallPath = GentooPath;
- SelectedInstallation.GCCParentLibPath =
- GentooPath + std::string("/../../..");
- SelectedInstallation.GCCTriple.setTriple(ActiveVersion.first);
+ Version = GCCVersion::Parse(ActiveVersion.second);
+ GCCInstallPath = GentooPath;
+ GCCParentLibPath = GentooPath + std::string("/../../..");
+ GCCTriple.setTriple(ActiveVersion.first);
IsValid = true;
return true;
}
@@ -3173,9 +3122,8 @@ void Generic_GCC::AddMultilibIncludeArgs(const ArgList &DriverArgs,
// gcc TOOL_INCLUDE_DIR.
const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
std::string LibPath(GCCInstallation.getParentLibPath());
- ToolChain::addSystemInclude(DriverArgs, CC1Args,
- Twine(LibPath) + "/../" + GCCTriple.str() +
- "/include");
+ addSystemInclude(DriverArgs, CC1Args,
+ Twine(LibPath) + "/../" + GCCTriple.str() + "/include");
const auto &Callback = Multilibs.includeDirsCallback();
if (Callback) {
@@ -3262,14 +3210,12 @@ Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
return;
}
-static bool addLibStdCXXIncludePaths(llvm::vfs::FileSystem &vfs,
- Twine IncludeDir, StringRef Triple,
- Twine IncludeSuffix,
- const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args,
- bool DetectDebian = false) {
-
- if (!vfs.exists(IncludeDir))
+bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
+ Twine IncludeSuffix,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ bool DetectDebian) const {
+ if (!getVFS().exists(IncludeDir))
return false;
// Debian native gcc uses g++-multiarch-incdir.
diff which uses
@@ -3281,48 +3227,39 @@ static bool addLibStdCXXIncludePaths(llvm::vfs::FileSystem &vfs,
std::string Path =
(Include + "/" + Triple + Dir.substr(Include.size()) + IncludeSuffix)
.str();
- if (DetectDebian && !vfs.exists(Path))
+ if (DetectDebian && !getVFS().exists(Path))
return false;
// GPLUSPLUS_INCLUDE_DIR
- ToolChain::addSystemInclude(DriverArgs, CC1Args, IncludeDir);
+ addSystemInclude(DriverArgs, CC1Args, IncludeDir);
// GPLUSPLUS_TOOL_INCLUDE_DIR. If Triple is not empty, add a target-dependent
// include directory.
if (DetectDebian)
- ToolChain::addSystemInclude(DriverArgs, CC1Args, Path);
+ addSystemInclude(DriverArgs, CC1Args, Path);
else if (!Triple.empty())
- ToolChain::addSystemInclude(DriverArgs, CC1Args,
- IncludeDir + "/" + Triple + IncludeSuffix);
+ addSystemInclude(DriverArgs, CC1Args,
+ IncludeDir + "/" + Triple + IncludeSuffix);
// GPLUSPLUS_BACKWARD_INCLUDE_DIR
- ToolChain::addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward");
+ addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward");
return true;
}
-bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
- Twine IncludeSuffix,
- const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args,
- bool DetectDebian) const {
- return ::addLibStdCXXIncludePaths(getVFS(), IncludeDir, Triple, IncludeSuffix,
- DriverArgs, CC1Args, DetectDebian);
-}
-
-bool Generic_GCC::GCCInstallCandidate::addGCCLibStdCxxIncludePaths(
- llvm::vfs::FileSystem &vfs, const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args, StringRef DebianMultiarch) const {
+bool Generic_GCC::addGCCLibStdCxxIncludePaths(
+ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+ StringRef DebianMultiarch) const {
+ assert(GCCInstallation.isValid());
// By default, look for the C++ headers in an include directory adjacent to
// the lib directory of the GCC installation. Note that this is expect to be
// equivalent to '/usr/include/c++/X.Y' in almost all cases.
- StringRef LibDir = getParentLibPath();
- StringRef InstallDir = getInstallPath();
- StringRef TripleStr = getTriple().str();
- const Multilib &Multilib = getMultilib();
- const GCCVersion &Version = getVersion();
+ StringRef LibDir = GCCInstallation.getParentLibPath();
+ StringRef InstallDir = GCCInstallation.getInstallPath();
+ StringRef TripleStr = GCCInstallation.getTriple().str();
+ const Multilib &Multilib = GCCInstallation.getMultilib();
+ const GCCVersion &Version = GCCInstallation.getVersion();
// Try /../$triple/include/c++/$version (gcc --print-multiarch is not empty).
- if (::addLibStdCXXIncludePaths(
- vfs,
+ if (addLibStdCXXIncludePaths(
LibDir.str() + "/../" + TripleStr + "/include/c++/" + Version.Text,
TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args))
return true;
@@ -3330,24 +3267,22 @@ bool Generic_GCC::GCCInstallCandidate::addGCCLibStdCxxIncludePaths(
// Try /gcc/$triple/$version/include/c++/ (gcc --print-multiarch is not
// empty). Like above but for GCC built with
// --enable-version-specific-runtime-libs.
- if (::addLibStdCXXIncludePaths(vfs,
- LibDir.str() + "/gcc/" + TripleStr + "/" +
- Version.Text + "/include/c++/",
- TripleStr, Multilib.includeSuffix(),
- DriverArgs, CC1Args))
+ if (addLibStdCXXIncludePaths(LibDir.str() + "/gcc/" + TripleStr + "/" +
+ Version.Text + "/include/c++/",
+ TripleStr, Multilib.includeSuffix(), DriverArgs,
+ CC1Args))
return true;
// Detect Debian g++-multiarch-incdir.
diff .
- if (::addLibStdCXXIncludePaths(
- vfs, LibDir.str() + "/../include/c++/" + Version.Text,
- DebianMultiarch, Multilib.includeSuffix(), DriverArgs, CC1Args,
- /*Debian=*/true))
+ if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
+ DebianMultiarch, Multilib.includeSuffix(),
+ DriverArgs, CC1Args, /*Debian=*/true))
return true;
// Try /../include/c++/$version (gcc --print-multiarch is empty).
- if (::addLibStdCXXIncludePaths(
- vfs, LibDir.str() + "/../include/c++/" + Version.Text, TripleStr,
- Multilib.includeSuffix(), DriverArgs, CC1Args))
+ if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
+ TripleStr, Multilib.includeSuffix(), DriverArgs,
+ CC1Args))
return true;
// Otherwise, fall back on a bunch of options which don't use multiarch
@@ -3362,50 +3297,20 @@ bool Generic_GCC::GCCInstallCandidate::addGCCLibStdCxxIncludePaths(
};
for (const auto &IncludePath : LibStdCXXIncludePathCandidates) {
- if (::addLibStdCXXIncludePaths(vfs, IncludePath, TripleStr,
- Multilib.includeSuffix(), DriverArgs,
- CC1Args))
+ if (addLibStdCXXIncludePaths(IncludePath, TripleStr,
+ Multilib.includeSuffix(), DriverArgs, CC1Args))
return true;
}
return false;
}
-bool Generic_GCC::GCCInstallationDetector::
- GCCInstallationHasLibStdcxxIncludePaths(
- const GCCInstallCandidate &GCCInstallation,
- const llvm::opt::ArgList &DriverArgs) const {
- StringRef DebianMultiarch =
- TripleToDebianMultiarch(GCCInstallation.getTriple());
-
- // The following function checks for libstdc++ include paths and
- // adds them to the provided argument list. Here we just need the
- // check.
- llvm::opt::ArgStringList dummyCC1Args;
- return GCCInstallation.addGCCLibStdCxxIncludePaths(
- D.getVFS(), DriverArgs, dummyCC1Args, DebianMultiarch);
-}
-
-bool Generic_GCC::addGCCLibStdCxxIncludePaths(
- const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args) const {
- assert(GCCInstallation.isValid());
-
- // Detect Debian g++-multiarch-incdir.
diff .
- StringRef DebianMultiarch =
- GCCInstallation.TripleToDebianMultiarch(GCCInstallation.getTriple());
-
- return GCCInstallation.getSelectedInstallation().addGCCLibStdCxxIncludePaths(
- getVFS(), DriverArgs, CC1Args, DebianMultiarch);
-}
-
void
Generic_GCC::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
- if (!GCCInstallation.isValid())
- return;
-
- GCCInstallation.getSelectedInstallation().addGCCLibStdCxxIncludePaths(
- getVFS(), DriverArgs, CC1Args, GCCInstallation.getTriple().str());
+ if (GCCInstallation.isValid()) {
+ addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args,
+ GCCInstallation.getTriple().str());
+ }
}
llvm::opt::DerivedArgList *
diff --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h
index 5fe143b4aa035..ad817452e5933 100644
--- a/clang/lib/Driver/ToolChains/Gnu.h
+++ b/clang/lib/Driver/ToolChains/Gnu.h
@@ -184,39 +184,6 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
bool operator>=(const GCCVersion &RHS) const { return !(*this < RHS); }
};
- struct GCCInstallCandidate {
- // FIXME: These might be better as path objects.
- std::string GCCInstallPath;
- std::string GCCParentLibPath;
-
- llvm::Triple GCCTriple;
-
- /// The primary multilib appropriate for the given flags.
- Multilib SelectedMultilib;
-
- GCCVersion Version;
-
- /// Get the GCC triple for the detected install.
- const llvm::Triple &getTriple() const { return GCCTriple; }
-
- /// Get the detected GCC installation path.
- StringRef getInstallPath() const { return GCCInstallPath; }
-
- /// Get the detected GCC parent lib path.
- StringRef getParentLibPath() const { return GCCParentLibPath; }
-
- /// Get the detected Multilib
- const Multilib &getMultilib() const { return SelectedMultilib; }
-
- /// Get the detected GCC version string.
- const GCCVersion &getVersion() const { return Version; }
-
- bool addGCCLibStdCxxIncludePaths(llvm::vfs::FileSystem &vfs,
- const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args,
- StringRef DebianMultiarch) const;
- };
-
/// This is a class to find a viable GCC installation for Clang to
/// use.
///
@@ -225,15 +192,21 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
/// Driver, and has logic for fuzzing that where appropriate.
class GCCInstallationDetector {
bool IsValid;
-
+ llvm::Triple GCCTriple;
const Driver &D;
- GCCInstallCandidate SelectedInstallation;
+ // FIXME: These might be better as path objects.
+ std::string GCCInstallPath;
+ std::string GCCParentLibPath;
+ /// The primary multilib appropriate for the given flags.
+ Multilib SelectedMultilib;
/// On Biarch systems, this corresponds to the default multilib when
/// targeting the non-default multilib. Otherwise, it is empty.
std::optional<Multilib> BiarchSibling;
+ GCCVersion Version;
+
// We retain the list of install paths that were considered and rejected in
// order to print out detailed information in verbose mode.
std::set<std::string> CandidateGCCInstallPaths;
@@ -245,50 +218,23 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
const std::string GentooConfigDir = "/etc/env.d/gcc";
public:
- /// Function for converting a triple to a Debian multiarch. The
- /// toolchains use this to adjust the target specific component of
- /// include paths for Debian.
- std::function<StringRef(const llvm::Triple &)> TripleToDebianMultiarch =
- [](const llvm::Triple &T) {
- StringRef S = T.str();
- return S;
- };
-
explicit GCCInstallationDetector(const Driver &D) : IsValid(false), D(D) {}
-
void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args);
- // TODO Replace isValid by changing SelectedInstallation into
- // std::optional<SelectedInstallation>
- // and move all accessors for fields of GCCInstallCandidate into
- // that struct.
-
/// Check whether we detected a valid GCC install.
bool isValid() const { return IsValid; }
- const GCCInstallCandidate &getSelectedInstallation() const {
- return SelectedInstallation;
- }
-
/// Get the GCC triple for the detected install.
- const llvm::Triple &getTriple() const {
- return SelectedInstallation.GCCTriple;
- }
+ const llvm::Triple &getTriple() const { return GCCTriple; }
/// Get the detected GCC installation path.
- StringRef getInstallPath() const {
- return SelectedInstallation.GCCInstallPath;
- }
+ StringRef getInstallPath() const { return GCCInstallPath; }
/// Get the detected GCC parent lib path.
- StringRef getParentLibPath() const {
- return SelectedInstallation.GCCParentLibPath;
- }
+ StringRef getParentLibPath() const { return GCCParentLibPath; }
/// Get the detected Multilib
- const Multilib &getMultilib() const {
- return SelectedInstallation.SelectedMultilib;
- }
+ const Multilib &getMultilib() const { return SelectedMultilib; }
/// Get the whole MultilibSet
const MultilibSet &getMultilibs() const { return Multilibs; }
@@ -298,9 +244,7 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
bool getBiarchSibling(Multilib &M) const;
/// Get the detected GCC version string.
- const GCCVersion &getVersion() const {
- return SelectedInstallation.Version;
- }
+ const GCCVersion &getVersion() const { return Version; }
/// Print information about the detected GCC installation.
void print(raw_ostream &OS) const;
@@ -318,19 +262,6 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
SmallVectorImpl<std::string> &Prefixes,
StringRef SysRoot);
- /// Checks if the \p GCCInstallation has libstdc++ include
- /// directories.
- bool GCCInstallationHasLibStdcxxIncludePaths(
- const GCCInstallCandidate &GCCInstallation,
- const llvm::opt::ArgList &DriverArgs) const;
-
- /// Select a GCC installation directory from \p Installations and
- /// set \p SelectedInstallation accordingly.
- bool SelectGCCInstallationDirectory(
- const SmallVector<GCCInstallCandidate, 3> &Installations,
- const llvm::opt::ArgList &Args,
- GCCInstallCandidate &SelectedInstallation) const;
-
bool ScanGCCForMultilibs(const llvm::Triple &TargetTriple,
const llvm::opt::ArgList &Args, StringRef Path,
bool NeedsBiarchSuffix = false);
@@ -417,7 +348,8 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain {
llvm::opt::ArgStringList &CC1Args) const;
bool addGCCLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC) const;
+ llvm::opt::ArgStringList &CC1Args,
+ StringRef DebianMultiarch) const;
bool addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
Twine IncludeSuffix,
diff --git a/clang/lib/Driver/ToolChains/Hurd.cpp b/clang/lib/Driver/ToolChains/Hurd.cpp
index 8bcc7e6d9759e..a22a8face1797 100644
--- a/clang/lib/Driver/ToolChains/Hurd.cpp
+++ b/clang/lib/Driver/ToolChains/Hurd.cpp
@@ -71,13 +71,6 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
- GCCInstallation.TripleToDebianMultiarch = [](const llvm::Triple &T) {
- StringRef TripleStr = T.str();
- StringRef DebianMultiarch =
- T.getArch() == llvm::Triple::x86 ? "i386-gnu" : TripleStr;
- return DebianMultiarch;
- };
-
GCCInstallation.init(Triple, Args);
Multilibs = GCCInstallation.getMultilibs();
SelectedMultilibs.assign({GCCInstallation.getMultilib()});
@@ -214,7 +207,12 @@ void Hurd::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
if (!GCCInstallation.isValid())
return;
- addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args);
+ StringRef TripleStr = GCCInstallation.getTriple().str();
+ StringRef DebianMultiarch =
+ GCCInstallation.getTriple().getArch() == llvm::Triple::x86 ? "i386-gnu"
+ : TripleStr;
+
+ addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args, DebianMultiarch);
}
void Hurd::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const {
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 16e35b08cfbd6..8ac8d4eb91812 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -211,13 +211,6 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
- GCCInstallation.TripleToDebianMultiarch = [](const llvm::Triple &T) {
- StringRef TripleStr = T.str();
- StringRef DebianMultiarch =
- T.getArch() == llvm::Triple::x86 ? "i386-linux-gnu" : TripleStr;
- return DebianMultiarch;
- };
-
GCCInstallation.init(Triple, Args);
Multilibs = GCCInstallation.getMultilibs();
SelectedMultilibs.assign({GCCInstallation.getMultilib()});
@@ -700,15 +693,22 @@ void Linux::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
if (!GCCInstallation.isValid())
return;
+ // Detect Debian g++-multiarch-incdir.
diff .
+ StringRef TripleStr = GCCInstallation.getTriple().str();
+ StringRef DebianMultiarch =
+ GCCInstallation.getTriple().getArch() == llvm::Triple::x86
+ ? "i386-linux-gnu"
+ : TripleStr;
+
// Try generic GCC detection first.
- if (Generic_GCC::addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args))
+ if (Generic_GCC::addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args,
+ DebianMultiarch))
return;
StringRef LibDir = GCCInstallation.getParentLibPath();
const Multilib &Multilib = GCCInstallation.getMultilib();
const GCCVersion &Version = GCCInstallation.getVersion();
- StringRef TripleStr = GCCInstallation.getTriple().str();
const std::string LibStdCXXIncludePathCandidates[] = {
// Android standalone toolchain has C++ headers in yet another place.
LibDir.str() + "/../" + TripleStr.str() + "/include/c++/" + Version.Text,
diff --git a/clang/lib/Driver/ToolChains/Managarm.cpp b/clang/lib/Driver/ToolChains/Managarm.cpp
index da4a9072317f4..0f56f0f6aad77 100644
--- a/clang/lib/Driver/ToolChains/Managarm.cpp
+++ b/clang/lib/Driver/ToolChains/Managarm.cpp
@@ -193,8 +193,10 @@ void Managarm::addLibStdCxxIncludePaths(
if (!GCCInstallation.isValid())
return;
+ StringRef TripleStr = GCCInstallation.getTriple().str();
+
// Try generic GCC detection.
- addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args);
+ Generic_GCC::addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args, TripleStr);
}
SanitizerMask Managarm::getSupportedSanitizers() const {
diff --git a/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl
new file mode 100644
index 0000000000000..d803882fd2f72
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local-multi-dim.hlsl
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies handling of multi-dimensional local arrays of resources
+// when used as a function argument and local variable.
+
+// CHECK: @_ZL1A = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1B = internal global %"class.hlsl::RWBuffer" poison, align 4
+
+RWBuffer<float> A : register(u10);
+RWBuffer<float> B : register(u20);
+RWStructuredBuffer<float> Out;
+
+// NOTE: _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float> and
+// _ZN4hlsl18RWStructuredBufferIfEixEj is the subscript operator for RWStructuredBuffer<float>
+
+// CHECK: define {{.*}} float @_Z3fooA2_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([2 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %Arr)
+// CHECK-NEXT: entry:
+float foo(RWBuffer<float> Arr[2][2]) {
+// CHECK-NEXT: %[[Arr_1_Ptr:.*]] = getelementptr inbounds [2 x [2 x %"class.hlsl::RWBuffer"]], ptr %Arr, i32 0, i32 1
+// CHECK-NEXT: %[[Arr_1_1_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %[[Arr_1_Ptr]], i32 0, i32 1
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Arr_1_1_Ptr]], i32 noundef 0)
+// CHECK-NEXT: %[[Value:.*]] = load float, ptr %[[BufPtr]], align 4
+// CHECK-NEXT: ret float %[[Value]]
+ return Arr[1][1][0];
+}
+
+// CHECK: define internal void @_Z4mainv()
+// CHECK-NEXT: entry:
+[numthreads(4,1,1)]
+void main() {
+// CHECK-NEXT: %L = alloca [2 x [2 x %"class.hlsl::RWBuffer"]], align 4
+// CHECK-NEXT: %[[Tmp:.*]] = alloca [2 x [2 x %"class.hlsl::RWBuffer"]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %L, ptr align 4 @_ZL1A, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr1:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %L, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr1]], ptr align 4 @_ZL1B, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr2:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %L, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr2]], ptr align 4 @_ZL1A, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr3:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %[[Ptr2]], i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr3]], ptr align 4 @_ZL1B, i32 4, i1 false)
+ RWBuffer<float> L[2][2] = { { A, B }, { A, B } };
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp]], ptr align 4 %L, i32 16, i1 false)
+// CHECK-NEXT: %[[ReturnedValue:.*]] = call {{.*}}float @_Z3fooA2_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([2 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %[[Tmp]])
+// CHECK-NEXT: %[[OutBufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr {{.*}} @_ZL3Out, i32 noundef 0)
+// CHECK-NEXT: store float %[[ReturnedValue]], ptr %[[OutBufPtr]], align 4
+// CHECK-NEXT: ret void
+ Out[0] = foo(L);
+}
diff --git a/clang/test/CodeGenHLSL/resources/res-array-local1.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local1.hlsl
new file mode 100644
index 0000000000000..c0d508b1395c3
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local1.hlsl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies local arrays of resources in HLSL.
+
+// CHECK: @_ZL1A = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1B = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1C = internal global %"class.hlsl::RWBuffer" poison, align 4
+
+RWBuffer<float> A : register(u1);
+RWBuffer<float> B : register(u2);
+RWBuffer<float> C : register(u3);
+RWStructuredBuffer<float> Out : register(u0);
+
+// CHECK: define internal void @_Z4mainv()
+// CHECK-NEXT: entry:
+[numthreads(4,1,1)]
+void main() {
+// CHECK-NEXT: %First = alloca [3 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT: %Second = alloca [4 x %"class.hlsl::RWBuffer"], align 4
+ RWBuffer<float> First[3] = { A, B, C };
+ RWBuffer<float> Second[4];
+
+// Verify initialization of First array from an initialization list
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %First, ptr align 4 @_ZL1A, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr1:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %First, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr1]], ptr align 4 @_ZL1B, i32 4, i1 false)
+// CHECK-NEXT: %[[Ptr2:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %First, i32 2
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr2]], ptr align 4 @_ZL1C, i32 4, i1 false)
+
+// Verify default initialization of Second array, which means there is a loop iterating
+// over the array elements and calling the default constructor for each
+// CHECK-NEXT: %[[ArrayBeginPtr:.*]] = getelementptr inbounds [4 x %"class.hlsl::RWBuffer"], ptr %Second, i32 0, i32 0
+// CHECK-NEXT: %[[ArrayEndPtr:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %[[ArrayBeginPtr]], i32 4
+// CHECK-NEXT: br label %[[ArrayInitLoop:.*]]
+// CHECK: [[ArrayInitLoop]]:
+// CHECK-NEXT: %[[ArrayCurPtr:.*]] = phi ptr [ %[[ArrayBeginPtr]], %entry ], [ %[[ArrayNextPtr:.*]], %[[ArrayInitLoop]] ]
+// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1Ev(ptr {{.*}} %[[ArrayCurPtr]])
+// CHECK-NEXT: %[[ArrayNextPtr]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %[[ArrayCurPtr]], i32 1
+// CHECK-NEXT: %[[ArrayInitDone:.*]] = icmp eq ptr %[[ArrayNextPtr]], %[[ArrayEndPtr]]
+// CHECK-NEXT: br i1 %[[ArrayInitDone]], label %[[AfterArrayInit:.*]], label %[[ArrayInitLoop]]
+// CHECK: [[AfterArrayInit]]:
+
+// Initialize First[2] with C
+// CHECK: %[[Ptr3:.*]] = getelementptr inbounds [4 x %"class.hlsl::RWBuffer"], ptr %Second, i32 0, i32 2
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Ptr3]], ptr align 4 @_ZL1C, i32 4, i1 false)
+ Second[2] = C;
+
+ // NOTE: _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float>
+
+// get First[1][0] value
+// CHECK: %[[First_1_Ptr:.*]] = getelementptr inbounds [3 x %"class.hlsl::RWBuffer"], ptr %First, i32 0, i32 1
+// CHECK: %[[BufPtr1:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[First_1_Ptr]], i32 noundef 0)
+// CHECK: %[[Value1:.*]] = load float, ptr %[[BufPtr1]], align 4
+
+// get Second[2][0] value
+// CHECK: %[[Second_2_Ptr:.*]] = getelementptr inbounds [4 x %"class.hlsl::RWBuffer"], ptr %Second, i32 0, i32 2
+// CHECK: %[[BufPtr2:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Second_2_Ptr]], i32 noundef 0)
+// CHECK: %[[Value2:.*]] = load float, ptr %[[BufPtr2]], align 4
+
+// add them
+// CHECK: %{{.*}} = fadd {{.*}} float %[[Value1]], %[[Value2]]
+ Out[0] = First[1][0] + Second[2][0];
+}
diff --git a/clang/test/CodeGenHLSL/resources/res-array-local2.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local2.hlsl
new file mode 100644
index 0000000000000..39f3aeb66ceb5
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local2.hlsl
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies handling of local arrays of resources when used as a function argument.
+
+// CHECK: @_ZL1A = internal global [3 x %"class.hlsl::RWBuffer"] poison, align 4
+
+RWBuffer<float> A[3] : register(u0);
+RWStructuredBuffer<float> Out : register(u0);
+
+// NOTE: _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float> and
+// _ZN4hlsl18RWStructuredBufferIfEixEj is the subscript operator for RWStructuredBuffer<float>
+
+// CHECK: define {{.*}} float @_Z3fooA3_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x %"class.hlsl::RWBuffer"]) align 4 %LocalA)
+// CHECK-NEXT: entry:
+float foo(RWBuffer<float> LocalA[3]) {
+// CHECK-NEXT: %[[LocalA_2_Ptr:.*]] = getelementptr inbounds [3 x %"class.hlsl::RWBuffer"], ptr %LocalA, i32 0, i32 2
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[LocalA_2_Ptr]], i32 noundef 0)
+// CHECK-NEXT: %[[Value:.*]] = load float, ptr %[[BufPtr]], align 4
+// CHECK-NEXT: ret float %[[Value]]
+ return LocalA[2][0];
+}
+
+// CHECK: define internal void @_Z4mainv()
+// CHECK-NEXT: entry:
+[numthreads(4,1,1)]
+void main() {
+// Check that the `main` function calls `foo` with a local copy of the array
+// CHECK-NEXT: %[[Tmp:.*]] = alloca [3 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp]], ptr align 4 @_ZL1A, i32 12, i1 false)
+
+// CHECK-NEXT: %[[ReturnedValue:.*]] = call {{.*}} float @_Z3fooA3_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x %"class.hlsl::RWBuffer"]) align 4 %[[Tmp]])
+// CHECK-NEXT: %[[OutBufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl18RWStructuredBufferIfEixEj(ptr {{.*}} @_ZL3Out, i32 noundef 0)
+// CHECK-NEXT: store float %[[ReturnedValue]], ptr %[[OutBufPtr]], align 4
+// CHECK-NEXT: ret void
+ Out[0] = foo(A);
+}
diff --git a/clang/test/CodeGenHLSL/resources/res-array-local3.hlsl b/clang/test/CodeGenHLSL/resources/res-array-local3.hlsl
new file mode 100644
index 0000000000000..e5bcdc651254f
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/res-array-local3.hlsl
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
+// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// This test verifies handling of local arrays of resources when used
+// as a function argument that is modified inside the function.
+
+// CHECK: @_ZL1X = internal global %"class.hlsl::RWBuffer" poison, align 4
+// CHECK: @_ZL1Y = internal global %"class.hlsl::RWBuffer" poison, align 4
+
+RWBuffer<int> X : register(u0);
+RWBuffer<int> Y : register(u1);
+
+// CHECK: define {{.*}} @_Z6SomeFnA2_N4hlsl8RWBufferIiEEji(
+// CHECK-SAME: ptr noundef byval([2 x %"class.hlsl::RWBuffer"]) align 4 %B, i32 noundef %Idx, i32 noundef %Val0)
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %[[Idx_addr:.*]] = alloca i32, align 4
+// CHECK-NEXT: %[[Val0_addr:.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 %Idx, ptr %[[Idx_addr]], align 4
+// CHECK-NEXT: store i32 %Val0, ptr %[[Val0_addr]], align 4
+void SomeFn(RWBuffer<int> B[2], uint Idx, int Val0) {
+
+// CHECK-NEXT: %[[B_0_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %B, i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[B_0_Ptr]], ptr align 4 @_ZL1Y, i32 4, i1 false)
+ B[0] = Y;
+
+// NOTE: _ZN4hlsl8RWBufferIiEixEj is the subscript operator for RWBuffer<int>
+
+// CHECK-NEXT: %[[Val0:.*]] = load i32, ptr %[[Val0_addr]], align 4
+// CHECK-NEXT: %[[B_0_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %B, i32 0, i32 0
+// CHECK-NEXT: %[[Idx:.*]] = load i32, ptr %[[Idx_addr]], align 4
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIiEixEj(ptr {{.*}} %[[B_0_Ptr]], i32 noundef %[[Idx]])
+// CHECK-NEXT: store i32 %[[Val0]], ptr %[[BufPtr]], align 4
+ B[0][Idx] = Val0;
+}
+
+// CHECK: define {{.*}} void @_Z4mainj(i32 noundef %GI)
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %[[GI_addr:.*]] = alloca i32, align 4
+[numthreads(4,1,1)]
+void main(uint GI : SV_GroupIndex) {
+// CHECK-NEXT: %A = alloca [2 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT: %[[Tmp:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4
+// CHECK-NEXT: store i32 %GI, ptr %GI.addr, align 4
+
+// Initialization of array A with resources X and Y
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %A, ptr align 4 @_ZL1X, i32 4, i1 false)
+// CHECK-NEXT: %[[A_1_Ptr:.*]] = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %A, i32 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[A_1_Ptr]], ptr align 4 @_ZL1Y, i32 4, i1 false)
+ RWBuffer<int> A[2] = {X, Y};
+
+// Verify that SomeFn is called with a local copy of the array A
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp]], ptr align 4 %A, i32 8, i1 false)
+// CHECK-NEXT: %[[GI:.*]] = load i32, ptr %[[GI_addr]], align 4
+// CHECK-NEXT: call void @_Z6SomeFnA2_N4hlsl8RWBufferIiEEji(ptr noundef byval([2 x %"class.hlsl::RWBuffer"]) align 4 %[[Tmp]], i32 noundef %[[GI]], i32 noundef 1)
+ SomeFn(A, GI, 1);
+
+// CHECK-NEXT: %[[A_0_Ptr:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %A, i32 0, i32 0
+// CHECK-NEXT: %[[GI:.*]] = load i32, ptr %[[GI_addr]], align 4
+// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIiEixEj(ptr {{.*}} %[[A_0_Ptr]], i32 noundef %[[GI]])
+// CHECK-NEXT: store i32 2, ptr %[[BufPtr]], align 4
+ A[0][GI] = 2;
+}
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/10/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/11/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu/12/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/10/include/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/11/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtbegin.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/crtend.o
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/include/c++/.keep b/clang/test/Driver/Inputs/gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu/12/include/c++/.keep
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/gcc-toolchain-libstdcxx.cpp b/clang/test/Driver/gcc-toolchain-libstdcxx.cpp
deleted file mode 100644
index 6ec1674500022..0000000000000
--- a/clang/test/Driver/gcc-toolchain-libstdcxx.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// UNSUPPORTED: system-windows
-
-// This file tests that the GCC installation directory detection takes
-// the libstdc++ includes into account. In each directory
-// Inputs/gcc_toolchain_libstdcxx/gccX, the installation directory for
-// gcc X should be picked in the future since it is the directory with
-// the largest version number which also contains the libstdc++
-// include directory.
-
-// RUN: %clang --gcc-toolchain=%S/Inputs/gcc_toolchain_libstdcxx/gcc10/usr -v 2>&1 |& FileCheck %s --check-prefix=GCC10
-// GCC10: clang: warning: future releases of the clang compiler will prefer GCC installations containing libstdc++ include directories; '[[PREFIX:.*gcc_toolchain_libstdcxx/gcc10/usr/lib/gcc/x86_64-linux-gnu]]/10' would be chosen over '[[PREFIX]]/12' [-Wgcc-install-dir-libstdcxx]
-// GCC10: Found candidate GCC installation: [[PREFIX]]/10
-// GCC10: Found candidate GCC installation: [[PREFIX]]/11
-// GCC10: Found candidate GCC installation: [[PREFIX]]/12
-// GCC10: Selected GCC installation: [[PREFIX]]/12
-
-// RUN: %clang --gcc-toolchain=%S/Inputs/gcc_toolchain_libstdcxx/gcc11/usr -v 2>&1 |& FileCheck %s --check-prefix=ONLY_GCC11
-// ONLY_GCC11: clang: warning: future releases of the clang compiler will prefer GCC installations containing libstdc++ include directories; '[[PREFIX:.*gcc_toolchain_libstdcxx/gcc11/usr/lib/gcc/x86_64-linux-gnu]]/11' would be chosen over '[[PREFIX]]/12' [-Wgcc-install-dir-libstdcxx]
-// ONLY_GCC11: Found candidate GCC installation: [[PREFIX]]/10
-// ONLY_GCC11: Found candidate GCC installation: [[PREFIX]]/11
-// ONLY_GCC11: Found candidate GCC installation: [[PREFIX]]/12
-// ONLY_GCC11: Selected GCC installation: [[PREFIX]]/12
-
-// RUN: %clang --gcc-toolchain=%S/Inputs/gcc_toolchain_libstdcxx/gcc12/usr -v 2>&1 |& FileCheck %s --check-prefix=GCC12
-// GCC12: Found candidate GCC installation: [[PREFIX:.*gcc_toolchain_libstdcxx/gcc12/usr/lib/gcc/x86_64-linux-gnu]]/10
-// GCC12: Found candidate GCC installation: [[PREFIX]]/11
-// GCC12: Found candidate GCC installation: [[PREFIX]]/12
-// GCC12: Selected GCC installation: [[PREFIX]]/12
diff --git a/lld/test/wasm/pie.s b/lld/test/wasm/pie.s
index 433958e149d8f..21eac79207318 100644
--- a/lld/test/wasm/pie.s
+++ b/lld/test/wasm/pie.s
@@ -2,7 +2,7 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten %S/Inputs/internal_func.s -o %t.internal_func.o
# RUN: wasm-ld --no-gc-sections --experimental-pic -pie --unresolved-symbols=import-dynamic -o %t.wasm %t.o %t.internal_func.o
# RUN: obj2yaml %t.wasm | FileCheck %s
-# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs,__wasm_apply_global_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DISASSEM
+# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DISASSEM
.functype external_func () -> ()
.functype internal_func1 () -> (i32)
@@ -41,9 +41,6 @@ foo:
drop
global.get __stack_pointer
- drop
-
- global.get __wasm_first_page_end at GOT
end_function
get_data_address:
@@ -148,18 +145,6 @@ _start:
# DISASSEM-LABEL: <__wasm_apply_data_relocs>:
# DISASSEM: end
-# global 6 is __wasm_first_page_end, which is ABSOLUTE and
-# thus should not be relocated.
-#
-# DISASSEM-LABEL: <__wasm_apply_global_relocs>:
-# DISASSEM: global.set 4
-# DISASSEM: global.set 5
-# DISASSEM-NOT: global.set 6
-# DISASSEM: global.set 7
-# DISASSEM: global.set 8
-# DISASSEM: global.set 9
-# DISASSEM: end
-
# Run the same test with extended-const support. When this is available
# we don't need __wasm_apply_global_relocs and instead rely on the add
# instruction in the InitExpr. We also, therefore, do not need these globals
@@ -188,23 +173,17 @@ _start:
# EXTENDED-CONST-NEXT: Type: I32
# EXTENDED-CONST-NEXT: Mutable: false
# EXTENDED-CONST-NEXT: InitExpr:
-# EXTENDED-CONST-NEXT: Opcode: I32_CONST
-# EXTENDED-CONST-NEXT: Value: 65536
-# EXTENDED-CONST-NEXT: - Index: 7
-# EXTENDED-CONST-NEXT: Type: I32
-# EXTENDED-CONST-NEXT: Mutable: false
-# EXTENDED-CONST-NEXT: InitExpr:
# EXTENDED-CONST-NEXT: Extended: true
# This instruction sequence decodes to:
# (global.get[0x23] 0x1 i32.const[0x41] 0x0C i32.add[0x6A] end[0x0b])
# EXTENDED-CONST-NEXT: Body: 2301410C6A0B
-# EXTENDED-CONST-NEXT: - Index: 8
+# EXTENDED-CONST-NEXT: - Index: 7
# EXTENDED-CONST-NEXT: Type: I32
# EXTENDED-CONST-NEXT: Mutable: false
# EXTENDED-CONST-NEXT: InitExpr:
# EXTENDED-CONST-NEXT: Opcode: GLOBAL_GET
# EXTENDED-CONST-NEXT: Index: 2
-# EXTENDED-CONST-NEXT: - Index: 9
+# EXTENDED-CONST-NEXT: - Index: 8
# EXTENDED-CONST-NEXT: Type: I32
# EXTENDED-CONST-NEXT: Mutable: false
# EXTENDED-CONST-NEXT: InitExpr:
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index e26f818fc2ca0..e1192706ea913 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -440,8 +440,6 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const {
: WASM_OPCODE_I32_ADD;
for (const Symbol *sym : internalGotSymbols) {
- if (sym->flags & WASM_SYMBOL_ABSOLUTE)
- continue;
if (TLS != sym->isTLS())
continue;
@@ -505,8 +503,7 @@ void GlobalSection::writeBody() {
bool useExtendedConst = false;
uint32_t globalIdx;
int64_t offset;
- if (ctx.arg.extendedConst && ctx.isPic &&
- (sym->flags & WASM_SYMBOL_ABSOLUTE) == 0) {
+ if (ctx.arg.extendedConst && ctx.isPic) {
if (auto *d = dyn_cast<DefinedData>(sym)) {
if (!sym->isTLS()) {
globalIdx = ctx.sym.memoryBase->getGlobalIndex();
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 2445005bf98ce..520ce4deb9a57 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1027,9 +1027,16 @@ static inline bool isAddLike(const SDValue V) {
(V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
}
+static SDValue stripAssertAlign(SDValue N) {
+ if (N.getOpcode() == ISD::AssertAlign)
+ N = N.getOperand(0);
+ return N;
+}
+
// selectBaseADDR - Match a dag node which will serve as the base address for an
// ADDR operand pair.
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
+ N = stripAssertAlign(N);
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
GA->getValueType(0), GA->getOffset(),
@@ -1044,6 +1051,7 @@ static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
}
static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
+ Addr = stripAssertAlign(Addr);
APInt AccumulatedOffset(64u, 0);
while (isAddLike(Addr)) {
const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
@@ -1055,7 +1063,7 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
break;
AccumulatedOffset += CI;
- Addr = Addr->getOperand(0);
+ Addr = stripAssertAlign(Addr->getOperand(0));
}
return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
MVT::i32);
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index f4362fe8d9056..e2bbe57c0085c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -412,6 +412,22 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,
}
}
+// Create a call to the nvvm_internal_addrspace_wrap intrinsic and set the
+// alignment of the return value based on the alignment of the argument.
+static CallInst *createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB,
+ Argument &Arg) {
+ CallInst *ArgInParam =
+ IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
+ {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()},
+ &Arg, {}, Arg.getName() + ".param");
+
+ if (MaybeAlign ParamAlign = Arg.getParamAlign())
+ ArgInParam->addRetAttr(
+ Attribute::getWithAlignment(ArgInParam->getContext(), *ParamAlign));
+
+ return ArgInParam;
+}
+
namespace {
struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> {
using Base = PtrUseVisitor<ArgUseChecker>;
@@ -515,10 +531,7 @@ void copyByValParam(Function &F, Argument &Arg) {
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
Arg.replaceAllUsesWith(AllocA);
- Value *ArgInParam =
- IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
- {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()},
- &Arg, {}, Arg.getName());
+ CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);
// Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
// addrspacecast preserves alignment. Since params are constant, this load
@@ -549,9 +562,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
SmallVector<Use *, 16> UsesToUpdate(llvm::make_pointer_range(Arg->uses()));
IRBuilder<> IRB(&*FirstInst);
- Value *ArgInParamAS = IRB.CreateIntrinsic(
- Intrinsic::nvvm_internal_addrspace_wrap,
- {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()}, {Arg});
+ CallInst *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg);
for (Use *U : UsesToUpdate)
convertToParamAS(U, ArgInParamAS, HasCvtaParam, IsGridConstant);
@@ -581,10 +592,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
// argument already in the param address space, we need to use the noop
// intrinsic, this had the added benefit of preventing other optimizations
// from folding away this pair of addrspacecasts.
- auto *ParamSpaceArg =
- IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
- {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()},
- Arg, {}, Arg->getName() + ".param");
+ auto *ParamSpaceArg = createNVVMInternalAddrspaceWrap(IRB, *Arg);
// Cast param address to generic address space.
Value *GenericArg = IRB.CreateAddrSpaceCast(
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3c19cea2bd1dd..4a1db80076530 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20759,12 +20759,22 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
isNullConstant(Src.getOperand(1)) &&
Src.getOperand(0).getValueType().isScalableVector()) {
EVT VT = N->getValueType(0);
- EVT SrcVT = Src.getOperand(0).getValueType();
- assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
+ SDValue EVSrc = Src.getOperand(0);
+ EVT EVSrcVT = EVSrc.getValueType();
+ assert(EVSrcVT.getVectorElementType() == VT.getVectorElementType());
// Widths match, just return the original vector.
- if (SrcVT == VT)
- return Src.getOperand(0);
- // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
+ if (EVSrcVT == VT)
+ return EVSrc;
+ SDLoc DL(N);
+ // Width is narrower, using insert_subvector.
+ if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ EVSrc,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ }
+ // Width is wider, using extract_subvector.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
}
[[fallthrough]];
}
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index b46f4829605a1..f6333d68a8ea5 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -118,6 +118,7 @@ set(LLVM_TEST_DEPENDS
llvm-objdump
llvm-opt-fuzzer
llvm-opt-report
+ llvm-offload-wrapper
llvm-otool
llvm-pdbutil
llvm-profdata
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
new file mode 100644
index 0000000000000..2051f6305cc03
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=nvptx-lower-args,infer-alignment -S | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+; ------------------------------------------------------------------------------
+; Test that alignment can be inferred through llvm.nvvm.internal.addrspace.wrap.p101.p0 intrinsics
+; thanks to the alignment attribute on the intrinsic
+; ------------------------------------------------------------------------------
+
+%struct.S1 = type { i32, i32, i32, i32 }
+define ptx_kernel i32 @test_align8(ptr noundef readonly byval(%struct.S1) align 8 captures(none) %params) {
+; CHECK-LABEL: define ptx_kernel i32 @test_align8(
+; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 8 captures(none) [[PARAMS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]])
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 8
+; CHECK-NEXT: ret i32 [[LOAD]]
+;
+entry:
+ %load = load i32, ptr %params, align 4
+ ret i32 %load
+}
+
+define ptx_kernel i32 @test_align1(ptr noundef readonly byval(%struct.S1) align 1 captures(none) %params) {
+; CHECK-LABEL: define ptx_kernel i32 @test_align1(
+; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 4 captures(none) [[PARAMS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call align 1 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]])
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 4
+; CHECK-NEXT: ret i32 [[LOAD]]
+;
+entry:
+ %load = load i32, ptr %params, align 4
+ ret i32 %load
+}
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 045704bdcd3fc..f5df0fcde1883 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -72,7 +72,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_int(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INPUT11:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT: [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
@@ -101,7 +101,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INPUT1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT: [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
@@ -137,7 +137,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
; OPT-NEXT: ret void
@@ -180,9 +180,9 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
+; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
-; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
; OPT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -208,7 +208,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
; OPT-NEXT: ret void
@@ -235,7 +235,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NOT .local
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
@@ -357,7 +357,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -416,7 +416,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
-; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -471,7 +471,7 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
-; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -520,7 +520,7 @@ declare void @device_func(ptr byval(i32) align 4)
define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INPUT_PARAM:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; OPT-NEXT: [[INPUT_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT_PARAM]] to ptr
; OPT-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]])
; OPT-NEXT: ret void
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 7c029ab516d6e..b4a51035c6610 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -200,7 +200,7 @@ define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) {
; IRC-LABEL: define ptx_kernel void @ptr_as_int_aggr(
; IRC-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
-; IRC-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; IRC-NEXT: [[S3:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; IRC-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S3]], align 8
; IRC-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
; IRC-NEXT: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
@@ -210,7 +210,7 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
;
; IRO-LABEL: define ptx_kernel void @ptr_as_int_aggr(
; IRO-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
-; IRO-NEXT: [[S1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; IRO-NEXT: [[S1:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; IRO-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S1]], align 8
; IRO-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
; IRO-NEXT: store i32 [[V]], ptr [[P]], align 4
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 20a35198c3c16..4d36ff9496ede 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -32,7 +32,7 @@ define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only(
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
-; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4
; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4
; LOWER-ARGS-NEXT: ret void
@@ -66,7 +66,7 @@ define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only_gep(
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
-; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; LOWER-ARGS-NEXT: [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4
@@ -128,7 +128,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; COMMON-NEXT: [[ENTRY:.*:]]
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]]) #[[ATTR6:[0-9]+]]
; COMMON-NEXT: ret void
@@ -167,7 +167,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COMMON-NEXT: [[ENTRY:.*:]]
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR6]]
@@ -209,7 +209,7 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COMMON-NEXT: [[ENTRY:.*:]]
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
; COMMON-NEXT: store ptr [[S1]], ptr [[OUT]], align 8
; COMMON-NEXT: ret void
@@ -246,7 +246,7 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COMMON-NEXT: [[ENTRY:.*:]]
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
; COMMON-NEXT: store ptr [[B]], ptr [[OUT]], align 8
@@ -286,7 +286,7 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COMMON-NEXT: [[ENTRY:.*:]]
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S1]] to i64
; COMMON-NEXT: store i64 [[I]], ptr [[OUT]], align 8
@@ -324,7 +324,7 @@ define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeo
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
-; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S3]], i64 16, i1 true)
; LOWER-ARGS-NEXT: ret void
;
@@ -445,7 +445,7 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COMMON-NEXT: [[ENTRY:.*:]]
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true)
; COMMON-NEXT: ret void
@@ -525,7 +525,7 @@ define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly %
; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COMMON-NEXT: [[BB:.*:]]
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
+; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN]], align 4
; COMMON-NEXT: store i32 [[I]], ptr [[S1]], align 4
@@ -551,7 +551,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
-; SM_60-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
@@ -563,7 +563,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
; SM_70-NEXT: [[BB:.*:]]
; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
-; SM_70-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; SM_70-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
; SM_70-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
@@ -577,7 +577,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
; COPY-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
-; COPY-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
; COPY-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
; COPY-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
@@ -637,7 +637,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
; COMMON-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
-; COMMON-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COMMON-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4
@@ -682,7 +682,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
-; SM_60-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; SM_60: [[FIRST]]:
@@ -702,7 +702,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
; SM_70-NEXT: [[BB:.*:]]
; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
-; SM_70-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; SM_70-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; SM_70: [[FIRST]]:
@@ -724,7 +724,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT23]], ptr addrspace(101) align 8 [[INPUT24]], i64 8, i1 false)
; COPY-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
-; COPY-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
; COPY-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; COPY: [[FIRST]]:
@@ -808,7 +808,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
; COMMON-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
-; COMMON-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
+; COMMON-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; COMMON: [[FIRST]]:
@@ -871,7 +871,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; COMMON-LABEL: define ptx_kernel void @test_forward_byval_arg(
; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] {
; COMMON-NEXT: [[INPUT1:%.*]] = alloca i32, align 4
-; COMMON-NEXT: [[INPUT2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
+; COMMON-NEXT: [[INPUT2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false)
; COMMON-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]])
; COMMON-NEXT: ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
new file mode 100644
index 0000000000000..da912bf401ec0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr='+v,+zvl512b' < %s | FileCheck %s
+
+define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x float> %arg2) {
+; CHECK-LABEL: redundant_vfmv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfredusum.vs v9, v12, v8
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v16, v8
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vrgather.vi v8, v9, 0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: ret
+ %s0 = extractelement <2 x float> %arg0, i64 0
+ %r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)
+ %a0 = insertelement <2 x float> poison, float %r0, i64 0
+ %s1 = extractelement <2 x float> %arg0, i64 1
+ %r1 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s1, <64 x float> %arg2)
+ %a1 = insertelement <2 x float> %a0, float %r1, i64 1
+ ret <2 x float> %a1
+}
diff --git a/llvm/test/Other/offload-wrapper.ll b/llvm/test/Other/offload-wrapper.ll
new file mode 100644
index 0000000000000..9107a141ad201
--- /dev/null
+++ b/llvm/test/Other/offload-wrapper.ll
@@ -0,0 +1,52 @@
+; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP
+
+; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
+; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
+; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
+; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+; HIP-NEXT: @.hip.binary_handle = internal global ptr null
+; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
+
+; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
+; HIP-NEXT: call void @.hip.globals_reg(ptr %0)
+; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+; HIP-NEXT: ret void
+; HIP-NEXT: }
+
+; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
+; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
+; HIP-NEXT: ret void
+; HIP-NEXT: }
+
+; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
+
+; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
+; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
+; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
+; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
+; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
+; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
+
+; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
+; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0)
+; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0)
+; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
+; CUDA-NEXT: ret void
+; CUDA-NEXT: }
+
+; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
+; CUDA-NEXT: ret void
+; CUDA-NEXT: }
diff --git a/llvm/tools/llvm-offload-wrapper/CMakeLists.txt b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt
new file mode 100644
index 0000000000000..18acbe2caaded
--- /dev/null
+++ b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(LLVM_LINK_COMPONENTS
+ BitWriter
+ Core
+ Object
+ Option
+ FrontendOffloading
+ Support
+ TargetParser
+ )
+
+add_llvm_tool(llvm-offload-wrapper
+ llvm-offload-wrapper.cpp
+
+ DEPENDS
+ intrinsics_gen
+ )
diff --git a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
new file mode 100644
index 0000000000000..9dac1646b1e26
--- /dev/null
+++ b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
@@ -0,0 +1,135 @@
+//===- llvm-offload-wrapper: Create runtime registration code for devices -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides a utility for generating runtime registration code for device code.
+// We take a binary image (CUDA fatbinary, HIP offload bundle, LLVM binary) and
+// create a new IR module that calls the respective runtime to load it on the
+// device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Frontend/Offloading/OffloadWrapper.h"
+#include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/Object/OffloadBinary.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Host.h"
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+static cl::OptionCategory
+ OffloadWrapeprCategory("llvm-offload-wrapper options");
+
+static cl::opt<object::OffloadKind> Kind(
+ "kind", cl::desc("Wrap for offload kind:"), cl::cat(OffloadWrapeprCategory),
+ cl::Required,
+ cl::values(clEnumValN(object::OFK_OpenMP, "openmp", "Wrap OpenMP binaries"),
+ clEnumValN(object::OFK_Cuda, "cuda", "Wrap CUDA binaries"),
+ clEnumValN(object::OFK_HIP, "hip", "Wrap HIP binaries")));
+
+static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."),
+ cl::value_desc("file"),
+ cl::cat(OffloadWrapeprCategory));
+
+static cl::list<std::string> InputFiles(cl::Positional,
+ cl::desc("Wrap input from <file>"),
+ cl::value_desc("file"), cl::OneOrMore,
+ cl::cat(OffloadWrapeprCategory));
+
+static cl::opt<std::string>
+ TheTriple("triple", cl::desc("Target triple for the wrapper module"),
+ cl::init(sys::getDefaultTargetTriple()),
+ cl::cat(OffloadWrapeprCategory));
+
+static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) {
+ if (BuffersToWrap.size() > 1 &&
+ (Kind == llvm::object::OFK_Cuda || Kind == llvm::object::OFK_HIP))
+ return createStringError(
+ "CUDA / HIP offloading uses a single fatbinary or offload bundle");
+
+ LLVMContext Context;
+ Module M("offload.wrapper.module", Context);
+ M.setTargetTriple(Triple());
+
+ switch (Kind) {
+ case llvm::object::OFK_OpenMP:
+ if (Error Err = offloading::wrapOpenMPBinaries(
+ M, BuffersToWrap, offloading::getOffloadEntryArray(M),
+ /*Suffix=*/"", /*Relocatable=*/false))
+ return Err;
+ break;
+ case llvm::object::OFK_Cuda:
+ if (Error Err = offloading::wrapCudaBinary(
+ M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M),
+ /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
+ return Err;
+ break;
+ case llvm::object::OFK_HIP:
+ if (Error Err = offloading::wrapHIPBinary(
+ M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M)))
+ return Err;
+ break;
+ default:
+ return createStringError(getOffloadKindName(Kind) +
+ " wrapping is not supported");
+ }
+
+ int FD = -1;
+ if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
+ return errorCodeToError(EC);
+ llvm::raw_fd_ostream OS(FD, true);
+ WriteBitcodeToFile(M, OS);
+
+ return Error::success();
+}
+
+int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
+ cl::HideUnrelatedOptions(OffloadWrapeprCategory);
+ cl::ParseCommandLineOptions(
+ argc, argv,
+ "Generate runtime registration code for a device binary image\n");
+
+ if (Help) {
+ cl::PrintHelpMessage();
+ return EXIT_SUCCESS;
+ }
+
+ auto ReportError = [argv](Error E) {
+ logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
+ exit(EXIT_FAILURE);
+ };
+
+ SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+ SmallVector<ArrayRef<char>> BuffersToWrap;
+ for (StringRef Input : InputFiles) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(Input);
+ if (std::error_code EC = BufferOrErr.getError())
+ ReportError(createFileError(Input, EC));
+ std::unique_ptr<MemoryBuffer> &Buffer =
+ Buffers.emplace_back(std::move(*BufferOrErr));
+ BuffersToWrap.emplace_back(
+ ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
+ }
+
+ if (Error Err = wrapImages(BuffersToWrap))
+ ReportError(std::move(Err));
+
+ return EXIT_SUCCESS;
+}
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index c0babb85f5c46..67a4c43455809 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -689,7 +689,7 @@ isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
// Set of regions of which the entry block domain has been propagated to BB.
// all predecessors inside any of the regions can be skipped.
- SmallSet<Region *, 8> PropagatedRegions;
+ SmallPtrSet<Region *, 8> PropagatedRegions;
for (auto *PredBB : predecessors(BB)) {
// Skip backedges.
More information about the llvm-branch-commits
mailing list