[clang] [libclc] [llvm] [libclc] Add initial LIT tests (PR #87989)
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 7 05:08:51 PDT 2025
https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/87989
>From b41b2032fdb01bd91d32255bf22a94315b58a017 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 30 Jun 2025 10:59:02 +0100
Subject: [PATCH 01/10] [libclc] Place libclc files in clang's resource dir
---
libclc/CMakeLists.txt | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index e2871d1b01a16..8bc3a75739fcd 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -63,6 +63,9 @@ if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DI
set( ${tool}_target )
endforeach()
endif()
+
+ # Setup the paths where libclc runtimes should be stored.
+ set( LIBCLC_OUTPUT_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )
else()
# In-tree configuration
set( LIBCLC_STANDALONE_BUILD FALSE )
@@ -82,10 +85,14 @@ else()
get_host_tool_path( llvm-link LLVM_LINK llvm-link_exe llvm-link_target )
get_host_tool_path( opt OPT opt_exe opt_target )
endif()
-endif()
-# Setup the paths where libclc runtimes should be stored.
-set( LIBCLC_OUTPUT_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )
+ # Setup the paths where libclc runtimes should be stored. By default, in an
+ # in-tree build we place the libraries in clang's resource driectory.
+ get_clang_resource_dir( LIBCLC_OUTPUT_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. )
+
+ # Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR.
+ set( LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_OUTPUT_DIR}/lib/libclc )
+endif()
if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} )
message( WARNING "Using custom LLVM tools to build libclc: "
>From 4057b6af7db11b59878739bff4d826a9fc616bbf Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Tue, 1 Jul 2025 10:56:54 +0100
Subject: [PATCH 02/10] [clang] Add the ability to link libclc OpenCL libraries
This commit adds driver support for linking libclc OpenCL libraries. It
takes the form of a new optional flag: --libclc-lib=namespec. Nothing is
linked unless this flag is specified.
Not all libclc targets have corresponding clang targets. For this reason
it is desirable for users to be able to specify a libclc library name.
We support this by taking both a library name (without the .bc suffix)
or a filename. Both of these are searched for in the clang resource
directory or in the LIBRARY_PATH environment variable. Filenames are
also checked themselves so that absolute paths can be provided. The
syntax for specifying filenames (as opposed to library names) uses a
leading colon (:), inspired by the -l option.
To accommodate this option, libclc libraries are now placed into clang's
resource directory in an in-tree configuration. The aliases are not
currently placed there to avoid polluting the directory, but that can be
changed. The libraries are all placed in <resource-dir>/lib/libclc and
are not grouped under host-specific directories as some other runtime
libraries are; it is not expected that OpenCL libraries will differ
depending on the host toolchain.
Currently only the AMDGPU toolchain supports this option as a proof of
concept. Other targets such as NVPTX or SPIR/SPIR-V could support it
too. We could optionally let target toolchains search for libclc
libraries themselves, possibly when passed an empty --libclc-lib.
---
.../clang/Basic/DiagnosticDriverKinds.td | 3 +
clang/include/clang/Driver/CommonArgs.h | 3 +
clang/include/clang/Driver/Options.td | 2 +
clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 +
clang/lib/Driver/ToolChains/CommonArgs.cpp | 59 +++++++++++++++++++
clang/test/Driver/Inputs/libclc/libclc.bc | 0
.../Driver/Inputs/libclc/subdir/libclc.bc | 0
clang/test/Driver/opencl-libclc.cl | 10 ++++
8 files changed, 79 insertions(+)
create mode 100644 clang/test/Driver/Inputs/libclc/libclc.bc
create mode 100644 clang/test/Driver/Inputs/libclc/subdir/libclc.bc
create mode 100644 clang/test/Driver/opencl-libclc.cl
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 34b6c0d7a8acd..019161c22a24f 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -392,6 +392,9 @@ def warn_drv_fraw_string_literals_in_cxx11 : Warning<
"ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">,
InGroup<UnusedCommandLineArgument>;
+def err_drv_libclc_not_found : Error<
+ "no libclc library '%0' found in the clang resource directory or in LIBRARY_PATH">;
+
def err_drv_invalid_malign_branch_EQ : Error<
"invalid argument '%0' to -malign-branch=; each element must be one of: %1">;
diff --git a/clang/include/clang/Driver/CommonArgs.h b/clang/include/clang/Driver/CommonArgs.h
index 26aa3ccf84786..7e8ab82eb7863 100644
--- a/clang/include/clang/Driver/CommonArgs.h
+++ b/clang/include/clang/Driver/CommonArgs.h
@@ -215,6 +215,9 @@ void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs,
StringRef BitcodeSuffix, const llvm::Triple &Triple,
const ToolChain &HostTC);
+void addOpenCLBuiltinsLib(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args);
+
void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 54c71b066f9d4..cf24bcac9c07c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1425,6 +1425,8 @@ def openacc_macro_override_EQ
// End Clang specific/exclusive options for OpenACC.
+def libclc_lib_EQ : Joined<["--"], "libclc-lib=">, Group<opencl_Group>,
+ HelpText<"Namespec of libclc OpenCL bitcode library to link">;
def libomptarget_amdgpu_bc_path_EQ : Joined<["--"], "libomptarget-amdgpu-bc-path=">, Group<i_Group>,
HelpText<"Path to libomptarget-amdgcn bitcode library">;
def libomptarget_amdgcn_bc_path_EQ : Joined<["--"], "libomptarget-amdgcn-bc-path=">, Group<i_Group>,
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index b7564a0495da8..e6d1baa2a1caa 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -795,6 +795,8 @@ void AMDGPUToolChain::addClangTargetOptions(
CC1Args.push_back("-fvisibility=hidden");
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
+
+ addOpenCLBuiltinsLib(getDriver(), DriverArgs, CC1Args);
}
void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index bdd77ac84913c..40aaa7ca324be 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2942,6 +2942,65 @@ void tools::addHIPRuntimeLibArgs(const ToolChain &TC, Compilation &C,
}
}
+void tools::addOpenCLBuiltinsLib(const Driver &D,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) {
+ // Check whether user specifies a libclc bytecode library
+ if (const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ)) {
+ SmallVector<StringRef, 8> LibraryPaths;
+
+ // Add user defined library paths from LIBRARY_PATH.
+ std::optional<std::string> LibPath =
+ llvm::sys::Process::GetEnv("LIBRARY_PATH");
+ if (LibPath) {
+ SmallVector<StringRef, 8> Frags;
+ const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
+ llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
+ for (StringRef Path : Frags)
+ LibraryPaths.emplace_back(Path.trim());
+ }
+
+ // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/
+ SmallString<128> LibclcPath(D.ResourceDir);
+ llvm::sys::path::append(LibclcPath, "lib", "libclc");
+ LibraryPaths.emplace_back(LibclcPath);
+
+ bool FoundBCLibrary = false;
+ StringRef LibclcNamespec(A->getValue());
+
+ // If the namespec is of the form :filename, search for that file.
+ bool FilenameSearch = LibclcNamespec.starts_with(":");
+ SmallString<128> LibclcTargetFile(
+ LibclcNamespec.drop_front(FilenameSearch ? 1 : 0));
+
+ if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
+ FoundBCLibrary = true;
+ CC1Args.push_back("-mlink-builtin-bitcode");
+ CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile));
+ } else {
+ // Search the library paths for the file
+ if (!FilenameSearch)
+ LibclcTargetFile += ".bc";
+
+ for (StringRef LibraryPath : LibraryPaths) {
+ SmallString<128> LibclcPath(LibraryPath);
+ llvm::sys::path::append(LibclcPath, LibclcTargetFile);
+ if (llvm::sys::fs::exists(LibclcPath)) {
+ FoundBCLibrary = true;
+ CC1Args.push_back("-mlink-builtin-bitcode");
+ CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath));
+ break;
+ }
+ }
+ }
+
+ // Since the user requested a library, if we haven't one then report an
+ // error.
+ if (!FoundBCLibrary)
+ D.Diag(diag::err_drv_libclc_not_found) << LibclcTargetFile;
+ }
+}
+
void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
diff --git a/clang/test/Driver/Inputs/libclc/libclc.bc b/clang/test/Driver/Inputs/libclc/libclc.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/libclc/subdir/libclc.bc b/clang/test/Driver/Inputs/libclc/subdir/libclc.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl
new file mode 100644
index 0000000000000..34b863b128682
--- /dev/null
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -0,0 +1,10 @@
+// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
+
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+
+// CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc
+// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc
>From bde5f5325e17525820bc145abb1f4c43e358d59b Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Tue, 1 Jul 2025 12:26:22 +0100
Subject: [PATCH 03/10] adjust LIBRARY_PATH usage in tests
---
clang/test/Driver/opencl-libclc.cl | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl
index 34b863b128682..8608a4f741b23 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -1,10 +1,10 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
// CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc
// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc
>From 5496f2dd3b449895f8de880f937e6fe3c8f29cd8 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Tue, 1 Jul 2025 12:50:15 +0100
Subject: [PATCH 04/10] fix path checks
---
clang/test/Driver/opencl-libclc.cl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl
index 8608a4f741b23..b7780d1af8d40 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -6,5 +6,5 @@
// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
-// CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc
-// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc
+// CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc
+// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc
>From df02a9e5a46206d5ed4880c293fbd0faf6728db2 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 7 Jul 2025 12:06:35 +0100
Subject: [PATCH 05/10] consume_front
---
clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 40aaa7ca324be..e6d9974d734bf 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2969,9 +2969,8 @@ void tools::addOpenCLBuiltinsLib(const Driver &D,
StringRef LibclcNamespec(A->getValue());
// If the namespec is of the form :filename, search for that file.
- bool FilenameSearch = LibclcNamespec.starts_with(":");
- SmallString<128> LibclcTargetFile(
- LibclcNamespec.drop_front(FilenameSearch ? 1 : 0));
+ bool FilenameSearch = LibclcNamespec.consume_front(":");
+ SmallString<128> LibclcTargetFile(LibclcNamespec);
if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
FoundBCLibrary = true;
>From 00b32eac8f3a06f9380a2cefeb20bef4e829d0fc Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 7 Jul 2025 12:23:10 +0100
Subject: [PATCH 06/10] remove logic for LIBRARY_PATH
---
clang/lib/Driver/ToolChains/CommonArgs.cpp | 67 ++++++++--------------
clang/test/Driver/opencl-libclc.cl | 7 +--
2 files changed, 24 insertions(+), 50 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index e6d9974d734bf..36f335154e6bc 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2946,57 +2946,36 @@ void tools::addOpenCLBuiltinsLib(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) {
// Check whether user specifies a libclc bytecode library
- if (const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ)) {
- SmallVector<StringRef, 8> LibraryPaths;
+ const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ);
+ if (!A)
+ return;
- // Add user defined library paths from LIBRARY_PATH.
- std::optional<std::string> LibPath =
- llvm::sys::Process::GetEnv("LIBRARY_PATH");
- if (LibPath) {
- SmallVector<StringRef, 8> Frags;
- const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
- llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
- for (StringRef Path : Frags)
- LibraryPaths.emplace_back(Path.trim());
- }
+ // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/
+ SmallString<128> LibclcPath(D.ResourceDir);
+ llvm::sys::path::append(LibclcPath, "lib", "libclc");
- // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/
- SmallString<128> LibclcPath(D.ResourceDir);
- llvm::sys::path::append(LibclcPath, "lib", "libclc");
- LibraryPaths.emplace_back(LibclcPath);
+ // If the namespec is of the form :filename, search for that file.
+ StringRef LibclcNamespec(A->getValue());
+ bool FilenameSearch = LibclcNamespec.consume_front(":");
+ SmallString<128> LibclcTargetFile(LibclcNamespec);
- bool FoundBCLibrary = false;
- StringRef LibclcNamespec(A->getValue());
-
- // If the namespec is of the form :filename, search for that file.
- bool FilenameSearch = LibclcNamespec.consume_front(":");
- SmallString<128> LibclcTargetFile(LibclcNamespec);
+ if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
+ CC1Args.push_back("-mlink-builtin-bitcode");
+ CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile));
+ } else {
+ // Search the library paths for the file
+ if (!FilenameSearch)
+ LibclcTargetFile += ".bc";
- if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
- FoundBCLibrary = true;
+ llvm::sys::path::append(LibclcPath, LibclcTargetFile);
+ if (llvm::sys::fs::exists(LibclcPath)) {
CC1Args.push_back("-mlink-builtin-bitcode");
- CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile));
+ CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath));
} else {
- // Search the library paths for the file
- if (!FilenameSearch)
- LibclcTargetFile += ".bc";
-
- for (StringRef LibraryPath : LibraryPaths) {
- SmallString<128> LibclcPath(LibraryPath);
- llvm::sys::path::append(LibclcPath, LibclcTargetFile);
- if (llvm::sys::fs::exists(LibclcPath)) {
- FoundBCLibrary = true;
- CC1Args.push_back("-mlink-builtin-bitcode");
- CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath));
- break;
- }
- }
- }
-
- // Since the user requested a library, if we haven't one then report an
- // error.
- if (!FoundBCLibrary)
+ // Since the user requested a library, if we haven't one then report an
+ // error.
D.Diag(diag::err_drv_libclc_not_found) << LibclcTargetFile;
+ }
}
}
diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl
index b7780d1af8d40..80c5d10550146 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -1,10 +1,5 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
-
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
// CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc
// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc
>From 1c16dbbd8225cb481bfd9a00fa61493a03478e3f Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 7 Jul 2025 12:29:04 +0100
Subject: [PATCH 07/10] fix diagnostic; add error test
---
clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 +--
clang/test/Driver/opencl-libclc.cl | 4 ++++
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 019161c22a24f..8d07ade73ec89 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -392,8 +392,7 @@ def warn_drv_fraw_string_literals_in_cxx11 : Warning<
"ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">,
InGroup<UnusedCommandLineArgument>;
-def err_drv_libclc_not_found : Error<
- "no libclc library '%0' found in the clang resource directory or in LIBRARY_PATH">;
+def err_drv_libclc_not_found : Error<"no libclc library '%0' found in the clang resource directory">;
def err_drv_invalid_malign_branch_EQ : Error<
"invalid argument '%0' to -malign-branch=; each element must be one of: %1">;
diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl
index 80c5d10550146..185690768c75b 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -1,5 +1,9 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: not %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/subdir/not-here.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-ERROR
+
// CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc
// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc
+
+// CHECK-ERROR: no libclc library{{.*}}not-here.bc' found in the clang resource directory
>From b83c111bba9bc4531a941170018060fdbf7de6d6 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Thu, 4 Apr 2024 17:49:13 +0100
Subject: [PATCH 08/10] [libclc] Add initial LIT tests
These tests aren't very meaningful and aren't immune to false positives,
but they do get the project building when running 'check-all' and so
enable libclc testing in CI.
---
libclc/CMakeLists.txt | 4 +
libclc/test/CMakeLists.txt | 35 +++
libclc/test/add_sat.cl | 11 -
libclc/test/as_type.cl | 11 -
libclc/test/convert.cl | 11 -
libclc/test/cos.cl | 11 -
libclc/test/cross.cl | 11 -
libclc/test/fabs.cl | 11 -
libclc/test/geometric/cross.cl | 51 +++++
libclc/test/get_group_id.cl | 11 -
libclc/test/integer/add_sat.cl | 32 +++
libclc/test/integer/sub_sat.cl | 72 +++++++
libclc/test/lit.cfg.py | 44 ++++
libclc/test/lit.site.cfg.py.in | 23 ++
libclc/test/math/cos.cl | 296 ++++++++++++++++++++++++++
libclc/test/math/fabs.cl | 32 +++
libclc/test/math/rsqrt.cl | 48 +++++
libclc/test/misc/as_type.cl | 31 +++
libclc/test/misc/convert.cl | 32 +++
libclc/test/rsqrt.cl | 14 --
libclc/test/subsat.cl | 27 ---
libclc/test/work-item/get_group_id.cl | 33 +++
22 files changed, 733 insertions(+), 118 deletions(-)
create mode 100644 libclc/test/CMakeLists.txt
delete mode 100644 libclc/test/add_sat.cl
delete mode 100644 libclc/test/as_type.cl
delete mode 100644 libclc/test/convert.cl
delete mode 100644 libclc/test/cos.cl
delete mode 100644 libclc/test/cross.cl
delete mode 100644 libclc/test/fabs.cl
create mode 100644 libclc/test/geometric/cross.cl
delete mode 100644 libclc/test/get_group_id.cl
create mode 100644 libclc/test/integer/add_sat.cl
create mode 100644 libclc/test/integer/sub_sat.cl
create mode 100644 libclc/test/lit.cfg.py
create mode 100644 libclc/test/lit.site.cfg.py.in
create mode 100644 libclc/test/math/cos.cl
create mode 100644 libclc/test/math/fabs.cl
create mode 100644 libclc/test/math/rsqrt.cl
create mode 100644 libclc/test/misc/as_type.cl
create mode 100644 libclc/test/misc/convert.cl
delete mode 100644 libclc/test/rsqrt.cl
delete mode 100644 libclc/test/subsat.cl
create mode 100644 libclc/test/work-item/get_group_id.cl
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 8bc3a75739fcd..673309c88e2cd 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -494,3 +494,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
)
endforeach( d )
endforeach( t )
+
+if( NOT LIBCLC_STANDALONE_BUILD )
+ add_subdirectory( test )
+endif()
diff --git a/libclc/test/CMakeLists.txt b/libclc/test/CMakeLists.txt
new file mode 100644
index 0000000000000..653b39c2821a7
--- /dev/null
+++ b/libclc/test/CMakeLists.txt
@@ -0,0 +1,35 @@
+set( LIBCLC_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
+
+set( LIBCLC_TEST_TARGETS_ALL
+ amdgcn-mesa-mesa3d
+)
+
+foreach( target IN LISTS LIBCLC_TEST_TARGETS_ALL )
+ # If we haven't built this libclc target, don't build the tests
+ if( NOT TARGET prepare-${target} )
+ message( WARNING "libclc tests require target ${target}. Tests will not be built" )
+ # Add a dummy target
+ add_custom_target( check-libclc )
+ return()
+ endif()
+
+ list( APPEND LIBCLC_TEST_DEPS prepare-${target} )
+endforeach()
+
+list( APPEND LIBCLC_TEST_DEPS
+ ${clang_target}
+ FileCheck
+)
+
+configure_lit_site_cfg(
+ ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+ ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+ MAIN_CONFIG
+ ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
+)
+
+add_lit_testsuite( check-libclc
+ "Running libclc regression tests"
+ ${CMAKE_CURRENT_BINARY_DIR}
+ DEPENDS ${LIBCLC_TEST_DEPS}
+)
diff --git a/libclc/test/add_sat.cl b/libclc/test/add_sat.cl
deleted file mode 100644
index 87c3d39df3542..0000000000000
--- a/libclc/test/add_sat.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(__global char *a, __global char *b, __global char *c) {
- *a = add_sat(*b, *c);
-}
diff --git a/libclc/test/as_type.cl b/libclc/test/as_type.cl
deleted file mode 100644
index a926f48c4ea0c..0000000000000
--- a/libclc/test/as_type.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(int4 *x, float4 *y) {
- *x = as_int4(*y);
-}
diff --git a/libclc/test/convert.cl b/libclc/test/convert.cl
deleted file mode 100644
index 8eba608dc5f8c..0000000000000
--- a/libclc/test/convert.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(int4 *x, float4 *y) {
- *x = convert_int4(*y);
-}
diff --git a/libclc/test/cos.cl b/libclc/test/cos.cl
deleted file mode 100644
index 92a998b3ba5f7..0000000000000
--- a/libclc/test/cos.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(float4 *f) {
- *f = cos(*f);
-}
diff --git a/libclc/test/cross.cl b/libclc/test/cross.cl
deleted file mode 100644
index 90762d0d073a6..0000000000000
--- a/libclc/test/cross.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(float4 *f) {
- *f = cross(f[0], f[1]);
-}
diff --git a/libclc/test/fabs.cl b/libclc/test/fabs.cl
deleted file mode 100644
index 3f5a964e0418a..0000000000000
--- a/libclc/test/fabs.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(float *f) {
- *f = fabs(*f);
-}
diff --git a/libclc/test/geometric/cross.cl b/libclc/test/geometric/cross.cl
new file mode 100644
index 0000000000000..4cb8c53bea5ee
--- /dev/null
+++ b/libclc/test/geometric/cross.cl
@@ -0,0 +1,51 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[F]], i64 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
+// CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
+// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = fneg float [[TMP5]]
+// CHECK-NEXT: [[NEG_I_I:%.*]] = fmul float [[TMP4]], [[TMP6]]
+// CHECK-NEXT: [[TMP7:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP2]], float [[TMP3]], float [[NEG_I_I]])
+// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP10:%.*]] = fneg float [[TMP3]]
+// CHECK-NEXT: [[NEG3_I_I:%.*]] = fmul float [[TMP9]], [[TMP10]]
+// CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP4]], float [[TMP8]], float [[NEG3_I_I]])
+// CHECK-NEXT: [[TMP12:%.*]] = fneg float [[TMP8]]
+// CHECK-NEXT: [[NEG6_I_I:%.*]] = fmul float [[TMP2]], [[TMP12]]
+// CHECK-NEXT: [[TMP13:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP9]], float [[TMP5]], float [[NEG6_I_I]])
+// CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float [[TMP7]], i64 0
+// CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 1
+// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP13]], i64 2
+// CHECK-NEXT: store <4 x float> [[VECINIT8_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global float4 *f) {
+ *f = cross(f[0], f[1]);
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"float4*"}
+// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/get_group_id.cl b/libclc/test/get_group_id.cl
deleted file mode 100644
index c2349a0076889..0000000000000
--- a/libclc/test/get_group_id.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(int *i) {
- i[get_group_id(0)] = 1;
-}
diff --git a/libclc/test/integer/add_sat.cl b/libclc/test/integer/add_sat.cl
new file mode 100644
index 0000000000000..ef5bf77b67d21
--- /dev/null
+++ b/libclc/test/integer/add_sat.cl
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[B:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(1) [[B]], align 1, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(1) [[C]], align 1, !tbaa [[TBAA10]]
+// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.sadd.sat.i8(i8 [[TMP0]], i8 [[TMP1]])
+// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global char *a, __global char *b, __global char *c) {
+ *a = add_sat(*b, *c);
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none", !"none"}
+// CHECK: [[META8]] = !{!"char*", !"char*", !"char*"}
+// CHECK: [[META9]] = !{!"", !"", !""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/integer/sub_sat.cl b/libclc/test/integer/sub_sat.cl
new file mode 100644
index 0000000000000..7c3f0a3aa306f
--- /dev/null
+++ b/libclc/test/integer/sub_sat.cl
@@ -0,0 +1,72 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_char(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.ssub.sat.i8(i8 [[X]], i8 [[Y]])
+// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT: ret void
+//
+__kernel void test_subsat_char(__global char *a, char x, char y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_uchar(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META9]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.usub.sat.i8(i8 [[X]], i8 [[Y]])
+// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]]
+// CHECK-NEXT: ret void
+//
+__kernel void test_subsat_uchar(__global uchar *a, uchar x, uchar y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_long(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META9]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.ssub.sat.i64(i64 [[X]], i64 [[Y]])
+// CHECK-NEXT: store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15:![0-9]+]]
+// CHECK-NEXT: ret void
+//
+__kernel void test_subsat_long(__global long *a, long x, long y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_ulong(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META9]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.usub.sat.i64(i64 [[X]], i64 [[Y]])
+// CHECK-NEXT: store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT: ret void
+//
+__kernel void test_subsat_ulong(__global ulong *a, ulong x, ulong y) {
+ *a = sub_sat(x, y);
+ return;
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 0, i32 0}
+// CHECK: [[META7]] = !{!"none", !"none", !"none"}
+// CHECK: [[META8]] = !{!"char*", !"char", !"char"}
+// CHECK: [[META9]] = !{!"", !"", !""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META13]] = !{!"uchar*", !"uchar", !"uchar"}
+// CHECK: [[META14]] = !{!"long*", !"long", !"long"}
+// CHECK: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0}
+// CHECK: [[META16]] = !{!"long", [[META11]], i64 0}
+// CHECK: [[META17]] = !{!"ulong*", !"ulong", !"ulong"}
+//.
diff --git a/libclc/test/lit.cfg.py b/libclc/test/lit.cfg.py
new file mode 100644
index 0000000000000..03532dad12d2a
--- /dev/null
+++ b/libclc/test/lit.cfg.py
@@ -0,0 +1,44 @@
+import os
+
+import lit.formats
+import lit.util
+
+from lit.llvm import llvm_config
+import site
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = "libclc"
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = [
+ ".cl",
+]
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.join(os.path.dirname(__file__))
+
+# test_exec_root: The root path where tests should be run.
+config.test_exec_root = os.path.join(config.test_run_dir, "test")
+
+llvm_config.use_default_substitutions()
+
+llvm_config.use_clang()
+
+tools = []
+tool_dirs = [config.llvm_tools_dir]
+
+llvm_config.add_tool_substitutions(tools, tool_dirs)
+
+# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites.
+# See https://github.com/llvm/llvm-project/issues/106636 for more details.
+#
+# We prefer the lit internal shell which provides a better user experience on failures
+# unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 env var.
+use_lit_shell = True
+lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if lit_shell_env:
+ use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
+
+config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell)
diff --git a/libclc/test/lit.site.cfg.py.in b/libclc/test/lit.site.cfg.py.in
new file mode 100644
index 0000000000000..a8618cf30c4fc
--- /dev/null
+++ b/libclc/test/lit.site.cfg.py.in
@@ -0,0 +1,23 @@
+ at LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.llvm_src_root = path(r"@LLVM_SOURCE_DIR@")
+config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@")
+config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@"))
+config.llvm_libs_dir = lit_config.substitute(path(r"@LLVM_LIBS_DIR@"))
+config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@"))
+config.lit_tools_dir = path(r"@LLVM_LIT_TOOLS_DIR@")
+config.host_triple = "@LLVM_HOST_TRIPLE@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.host_arch = "@HOST_ARCH@"
+config.python_executable = "@Python3_EXECUTABLE@"
+config.libclc_src_dir = path(r"@LIBCLC_SOURCE_DIR@")
+config.test_run_dir = path(r"@LIBCLC_BINARY_DIR@")
+
+import lit.llvm
+lit.llvm.initialize(lit_config, config)
+
+# Let the main config do the real work.
+lit_config.load_config(
+ config, os.path.join(config.libclc_src_dir, "test/lit.cfg.py"))
diff --git a/libclc/test/math/cos.cl b/libclc/test/math/cos.cl
new file mode 100644
index 0000000000000..79272cce4d54d
--- /dev/null
+++ b/libclc/test/math/cos.cl
@@ -0,0 +1,296 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT: [[ELT_ABS_I_I_I:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
+// CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp olt <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x4160000000000000)
+// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[ELT_ABS_I_I_I]], <4 x float> splat (float 0x3FE45F3060000000), <4 x float> splat (float 5.000000e-01))
+// CHECK-NEXT: [[ELT_TRUNC_I_I:%.*]] = tail call noundef <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
+// CHECK-NEXT: [[MUL_I30_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3FF921FB40000000)
+// CHECK-NEXT: [[FNEG_I31_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I30_I_I_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I31_I_I_I_I]])
+// CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[MUL_I30_I_I_I_I]]
+// CHECK-NEXT: [[SUB2_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[SUB_I_I_I_I]]
+// CHECK-NEXT: [[SUB3_I_I_I_I:%.*]] = fsub <4 x float> [[SUB2_I_I_I_I]], [[MUL_I30_I_I_I_I]]
+// CHECK-NEXT: [[SUB4_I_I_I_I:%.*]] = fsub <4 x float> [[SUB3_I_I_I_I]], [[TMP2]]
+// CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd <4 x float> [[SUB_I_I_I_I]], [[SUB4_I_I_I_I]]
+// CHECK-NEXT: [[MUL_I27_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3E74442D00000000)
+// CHECK-NEXT: [[FNEG_I28_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I27_I_I_I_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[FNEG_I28_I_I_I_I]])
+// CHECK-NEXT: [[SUB5_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[MUL_I27_I_I_I_I]]
+// CHECK-NEXT: [[SUB6_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[SUB5_I_I_I_I]]
+// CHECK-NEXT: [[SUB7_I_I_I_I:%.*]] = fsub <4 x float> [[SUB6_I_I_I_I]], [[MUL_I27_I_I_I_I]]
+// CHECK-NEXT: [[SUB8_I_I_I_I:%.*]] = fsub <4 x float> [[SUB7_I_I_I_I]], [[TMP3]]
+// CHECK-NEXT: [[ADD9_I_I_I_I:%.*]] = fadd <4 x float> [[SUB5_I_I_I_I]], [[SUB8_I_I_I_I]]
+// CHECK-NEXT: [[MUL_I_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3CF8469880000000)
+// CHECK-NEXT: [[FNEG_I_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I_I_I_I_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3CF8469880000000), <4 x float> [[FNEG_I_I_I_I_I]])
+// CHECK-NEXT: [[SUB10_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[MUL_I_I_I_I_I]]
+// CHECK-NEXT: [[SUB11_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[SUB10_I_I_I_I]]
+// CHECK-NEXT: [[SUB12_I_I_I_I:%.*]] = fsub <4 x float> [[SUB11_I_I_I_I]], [[MUL_I_I_I_I_I]]
+// CHECK-NEXT: [[ADD13_I_I_I_I:%.*]] = fadd <4 x float> [[SUB10_I_I_I_I]], [[SUB12_I_I_I_I]]
+// CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <4 x float> [[TMP4]]
+// CHECK-NEXT: [[CONV_I_I_I:%.*]] = fptosi <4 x float> [[ELT_TRUNC_I_I]] to <4 x i32>
+// CHECK-NEXT: [[ASTYPE_I_I_I:%.*]] = bitcast <4 x float> [[ELT_ABS_I_I_I]] to <4 x i32>
+// CHECK-NEXT: [[SHR_I_I_I:%.*]] = lshr <4 x i32> [[ASTYPE_I_I_I]], splat (i32 23)
+// CHECK-NEXT: [[AND_I11_I_I:%.*]] = and <4 x i32> [[ASTYPE_I_I_I]], splat (i32 8388607)
+// CHECK-NEXT: [[OR_I_I_I:%.*]] = or disjoint <4 x i32> [[AND_I11_I_I]], splat (i32 8388608)
+// CHECK-NEXT: [[MUL_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -28220501)
+// CHECK-NEXT: [[CONV_I1_I27_I_I:%.*]] = zext nneg <4 x i32> [[OR_I_I_I]] to <4 x i64>
+// CHECK-NEXT: [[MUL_I28_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4266746795)
+// CHECK-NEXT: [[SHR_I29_I_I:%.*]] = lshr <4 x i64> [[MUL_I28_I_I]], splat (i64 32)
+// CHECK-NEXT: [[CONV_I2_I30_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I29_I_I]] to <4 x i32>
+// CHECK-NEXT: [[MUL2_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1011060801)
+// CHECK-NEXT: [[ADD_I_I_I:%.*]] = add <4 x i32> [[MUL2_I_I_I]], [[CONV_I2_I30_I_I]]
+// CHECK-NEXT: [[MUL_I24_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1011060801)
+// CHECK-NEXT: [[SHR_I25_I_I:%.*]] = lshr <4 x i64> [[MUL_I24_I_I]], splat (i64 32)
+// CHECK-NEXT: [[CONV_I2_I26_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I25_I_I]] to <4 x i32>
+// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD_I_I_I]], [[CONV_I2_I30_I_I]]
+// CHECK-NEXT: [[SEXT_I_I1_I:%.*]] = zext <4 x i1> [[CMP_I_I_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD5_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT_I_I1_I]], [[CONV_I2_I26_I_I]]
+// CHECK-NEXT: [[MUL6_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -614296167)
+// CHECK-NEXT: [[ADD7_I_I_I:%.*]] = add <4 x i32> [[ADD5_I_I_I]], [[MUL6_I_I_I]]
+// CHECK-NEXT: [[MUL_I20_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 3680671129)
+// CHECK-NEXT: [[SHR_I21_I_I:%.*]] = lshr <4 x i64> [[MUL_I20_I_I]], splat (i64 32)
+// CHECK-NEXT: [[CONV_I2_I22_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I21_I_I]] to <4 x i32>
+// CHECK-NEXT: [[CMP9_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD7_I_I_I]], [[ADD5_I_I_I]]
+// CHECK-NEXT: [[SEXT10_I_I_I:%.*]] = zext <4 x i1> [[CMP9_I_I_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD13_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT10_I_I_I]], [[CONV_I2_I22_I_I]]
+// CHECK-NEXT: [[MUL14_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -181084736)
+// CHECK-NEXT: [[ADD15_I_I_I:%.*]] = add <4 x i32> [[ADD13_I_I_I]], [[MUL14_I_I_I]]
+// CHECK-NEXT: [[MUL_I16_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4113882560)
+// CHECK-NEXT: [[SHR_I17_I_I:%.*]] = lshr <4 x i64> [[MUL_I16_I_I]], splat (i64 32)
+// CHECK-NEXT: [[CONV_I2_I18_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I17_I_I]] to <4 x i32>
+// CHECK-NEXT: [[CMP17_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD15_I_I_I]], [[ADD13_I_I_I]]
+// CHECK-NEXT: [[SEXT18_I_I_I:%.*]] = zext <4 x i1> [[CMP17_I_I_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD21_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT18_I_I_I]], [[CONV_I2_I18_I_I]]
+// CHECK-NEXT: [[MUL22_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -64530479)
+// CHECK-NEXT: [[ADD23_I_I_I:%.*]] = add <4 x i32> [[ADD21_I_I_I]], [[MUL22_I_I_I]]
+// CHECK-NEXT: [[MUL_I12_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4230436817)
+// CHECK-NEXT: [[SHR_I13_I_I:%.*]] = lshr <4 x i64> [[MUL_I12_I_I]], splat (i64 32)
+// CHECK-NEXT: [[CONV_I2_I14_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I13_I_I]] to <4 x i32>
+// CHECK-NEXT: [[CMP25_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD23_I_I_I]], [[ADD21_I_I_I]]
+// CHECK-NEXT: [[SEXT26_I_I_I:%.*]] = zext <4 x i1> [[CMP25_I_I_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD29_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT26_I_I_I]], [[CONV_I2_I14_I_I]]
+// CHECK-NEXT: [[MUL30_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1313084713)
+// CHECK-NEXT: [[ADD31_I_I_I:%.*]] = add <4 x i32> [[ADD29_I_I_I]], [[MUL30_I_I_I]]
+// CHECK-NEXT: [[MUL_I8_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1313084713)
+// CHECK-NEXT: [[SHR_I9_I_I:%.*]] = lshr <4 x i64> [[MUL_I8_I_I]], splat (i64 32)
+// CHECK-NEXT: [[CONV_I2_I10_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I9_I_I]] to <4 x i32>
+// CHECK-NEXT: [[CMP33_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD31_I_I_I]], [[ADD29_I_I_I]]
+// CHECK-NEXT: [[SEXT34_I_I_I:%.*]] = zext <4 x i1> [[CMP33_I_I_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD37_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT34_I_I_I]], [[CONV_I2_I10_I_I]]
+// CHECK-NEXT: [[MUL38_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -1560706194)
+// CHECK-NEXT: [[ADD39_I_I_I:%.*]] = add <4 x i32> [[ADD37_I_I_I]], [[MUL38_I_I_I]]
+// CHECK-NEXT: [[MUL_I5_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 2734261102)
+// CHECK-NEXT: [[SHR_I6_I_I:%.*]] = lshr <4 x i64> [[MUL_I5_I_I]], splat (i64 32)
+// CHECK-NEXT: [[CONV_I2_I_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I6_I_I]] to <4 x i32>
+// CHECK-NEXT: [[CMP41_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD39_I_I_I]], [[ADD37_I_I_I]]
+// CHECK-NEXT: [[SEXT42_I_I_I:%.*]] = zext <4 x i1> [[CMP41_I_I_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD45_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT42_I_I_I]], [[CONV_I2_I_I_I]]
+// CHECK-NEXT: [[SUB47_I_I_I:%.*]] = add nsw <4 x i32> [[SHR_I_I_I]], splat (i32 -120)
+// CHECK-NEXT: [[CMP48_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB47_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[COND51_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD39_I_I_I]], <4 x i32> [[ADD45_I_I_I]]
+// CHECK-NEXT: [[COND53_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD31_I_I_I]], <4 x i32> [[ADD39_I_I_I]]
+// CHECK-NEXT: [[COND55_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD23_I_I_I]], <4 x i32> [[ADD31_I_I_I]]
+// CHECK-NEXT: [[COND57_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD15_I_I_I]], <4 x i32> [[ADD23_I_I_I]]
+// CHECK-NEXT: [[COND59_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD7_I_I_I]], <4 x i32> [[ADD15_I_I_I]]
+// CHECK-NEXT: [[COND61_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD_I_I_I]], <4 x i32> [[ADD7_I_I_I]]
+// CHECK-NEXT: [[COND63_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[MUL_I_I_I]], <4 x i32> [[ADD_I_I_I]]
+// CHECK-NEXT: [[DOTNEG_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT: [[SUB66_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG_I_I_I]], [[SUB47_I_I_I]]
+// CHECK-NEXT: [[CMP67_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB66_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[COND70_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND53_I_I_I]], <4 x i32> [[COND51_I_I_I]]
+// CHECK-NEXT: [[COND72_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND55_I_I_I]], <4 x i32> [[COND53_I_I_I]]
+// CHECK-NEXT: [[COND74_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND57_I_I_I]], <4 x i32> [[COND55_I_I_I]]
+// CHECK-NEXT: [[COND76_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND59_I_I_I]], <4 x i32> [[COND57_I_I_I]]
+// CHECK-NEXT: [[COND78_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND61_I_I_I]], <4 x i32> [[COND59_I_I_I]]
+// CHECK-NEXT: [[COND80_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND63_I_I_I]], <4 x i32> [[COND61_I_I_I]]
+// CHECK-NEXT: [[DOTNEG379_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT: [[SUB83_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG379_I_I_I]], [[SUB66_I_I_I]]
+// CHECK-NEXT: [[CMP84_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB83_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[COND87_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND72_I_I_I]], <4 x i32> [[COND70_I_I_I]]
+// CHECK-NEXT: [[COND89_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND74_I_I_I]], <4 x i32> [[COND72_I_I_I]]
+// CHECK-NEXT: [[COND91_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND76_I_I_I]], <4 x i32> [[COND74_I_I_I]]
+// CHECK-NEXT: [[COND93_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND78_I_I_I]], <4 x i32> [[COND76_I_I_I]]
+// CHECK-NEXT: [[COND95_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND80_I_I_I]], <4 x i32> [[COND78_I_I_I]]
+// CHECK-NEXT: [[DOTNEG380_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT: [[SUB98_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG380_I_I_I]], [[SUB83_I_I_I]]
+// CHECK-NEXT: [[CMP99_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB98_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[COND102_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND89_I_I_I]], <4 x i32> [[COND87_I_I_I]]
+// CHECK-NEXT: [[COND104_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND91_I_I_I]], <4 x i32> [[COND89_I_I_I]]
+// CHECK-NEXT: [[COND106_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND93_I_I_I]], <4 x i32> [[COND91_I_I_I]]
+// CHECK-NEXT: [[COND108_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND95_I_I_I]], <4 x i32> [[COND93_I_I_I]]
+// CHECK-NEXT: [[DOTNEG381_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT: [[SUB111_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SUB98_I_I_I]]
+// CHECK-NEXT: [[CMP112_NOT_I_I_I:%.*]] = icmp eq <4 x i32> [[DOTNEG381_I_I_I]], [[SUB111_I_I_I]]
+// CHECK-NEXT: [[SUB114_I_I_I:%.*]] = sub nsw <4 x i32> splat (i32 24), [[SHR_I_I_I]]
+// CHECK-NEXT: [[SHL_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB47_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[SHL_I_I_I:%.*]] = shl <4 x i32> [[COND102_I_I_I]], [[SHL_MASK_I_I_I]]
+// CHECK-NEXT: [[SHR_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB114_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[SHR116_I_I_I:%.*]] = lshr <4 x i32> [[COND104_I_I_I]], [[SHR_MASK_I_I_I]]
+// CHECK-NEXT: [[OR117_I_I_I:%.*]] = or <4 x i32> [[SHL_I_I_I]], [[SHR116_I_I_I]]
+// CHECK-NEXT: [[SHL120_I_I_I:%.*]] = shl <4 x i32> [[COND104_I_I_I]], [[SHL_MASK_I_I_I]]
+// CHECK-NEXT: [[SHR122_I_I_I:%.*]] = lshr <4 x i32> [[COND106_I_I_I]], [[SHR_MASK_I_I_I]]
+// CHECK-NEXT: [[OR123_I_I_I:%.*]] = or <4 x i32> [[SHL120_I_I_I]], [[SHR122_I_I_I]]
+// CHECK-NEXT: [[SHL126_I_I_I:%.*]] = shl <4 x i32> [[COND106_I_I_I]], [[SHL_MASK_I_I_I]]
+// CHECK-NEXT: [[SHR128_I_I_I:%.*]] = lshr <4 x i32> [[COND108_I_I_I]], [[SHR_MASK_I_I_I]]
+// CHECK-NEXT: [[OR129_I_I_I:%.*]] = or <4 x i32> [[SHL126_I_I_I]], [[SHR128_I_I_I]]
+// CHECK-NEXT: [[COND131_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND102_I_I_I]], <4 x i32> [[OR117_I_I_I]]
+// CHECK-NEXT: [[COND133_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND104_I_I_I]], <4 x i32> [[OR123_I_I_I]]
+// CHECK-NEXT: [[COND135_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND106_I_I_I]], <4 x i32> [[OR129_I_I_I]]
+// CHECK-NEXT: [[SHR136_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 29)
+// CHECK-NEXT: [[OR139_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND131_I_I_I]], <4 x i32> [[COND133_I_I_I]], <4 x i32> splat (i32 2))
+// CHECK-NEXT: [[OR142_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND133_I_I_I]], <4 x i32> [[COND135_I_I_I]], <4 x i32> splat (i32 2))
+// CHECK-NEXT: [[OR145_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND135_I_I_I]], <4 x i32> [[COND108_I_I_I]], <4 x i32> splat (i32 2))
+// CHECK-NEXT: [[AND146_I_I_I:%.*]] = and <4 x i32> [[SHR136_I_I_I]], splat (i32 1)
+// CHECK-NEXT: [[SEXT148_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[AND146_I_I_I]]
+// CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[SEXT148_I_I_I]], splat (i32 -2147483648)
+// CHECK-NEXT: [[XOR_I_I_I:%.*]] = xor <4 x i32> [[OR139_I_I_I]], [[SEXT148_I_I_I]]
+// CHECK-NEXT: [[XOR156_I_I_I:%.*]] = xor <4 x i32> [[OR142_I_I_I]], [[SEXT148_I_I_I]]
+// CHECK-NEXT: [[XOR157_I_I_I:%.*]] = xor <4 x i32> [[OR145_I_I_I]], [[SEXT148_I_I_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 0
+// CHECK-NEXT: [[TMP7:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP6]], i1 false)
+// CHECK-NEXT: [[VECINIT_I1_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
+// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 1
+// CHECK-NEXT: [[TMP9:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP8]], i1 false)
+// CHECK-NEXT: [[VECINIT2_I2_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I1_I_I]], i32 [[TMP9]], i64 1
+// CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 2
+// CHECK-NEXT: [[TMP11:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP10]], i1 false)
+// CHECK-NEXT: [[VECINIT4_I3_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I2_I_I]], i32 [[TMP11]], i64 2
+// CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 3
+// CHECK-NEXT: [[TMP13:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP12]], i1 false)
+// CHECK-NEXT: [[VECINIT6_I4_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I3_I_I]], i32 [[TMP13]], i64 3
+// CHECK-NEXT: [[ADD159_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 1)
+// CHECK-NEXT: [[SHL_MASK162_I_I_I:%.*]] = and <4 x i32> [[ADD159_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[SHL163_I_I_I:%.*]] = shl <4 x i32> [[XOR_I_I_I]], [[SHL_MASK162_I_I_I]]
+// CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 31)
+// CHECK-NEXT: [[SHR_MASK164_I_I_I:%.*]] = xor <4 x i32> [[TMP14]], splat (i32 31)
+// CHECK-NEXT: [[SHR165_I_I_I:%.*]] = lshr <4 x i32> [[XOR156_I_I_I]], [[SHR_MASK164_I_I_I]]
+// CHECK-NEXT: [[OR166_I_I_I:%.*]] = or <4 x i32> [[SHL163_I_I_I]], [[SHR165_I_I_I]]
+// CHECK-NEXT: [[SHL169_I_I_I:%.*]] = shl <4 x i32> [[XOR156_I_I_I]], [[SHL_MASK162_I_I_I]]
+// CHECK-NEXT: [[SHR171_I_I_I:%.*]] = lshr <4 x i32> [[XOR157_I_I_I]], [[SHR_MASK164_I_I_I]]
+// CHECK-NEXT: [[OR172_I_I_I:%.*]] = or <4 x i32> [[SHL169_I_I_I]], [[SHR171_I_I_I]]
+// CHECK-NEXT: [[SHR176_I_I_I:%.*]] = lshr <4 x i32> [[OR166_I_I_I]], splat (i32 9)
+// CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 23)
+// CHECK-NEXT: [[REASS_SUB:%.*]] = sub nsw <4 x i32> [[SHR176_I_I_I]], [[TMP15]]
+// CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[REASS_SUB]], splat (i32 1056964608)
+// CHECK-NEXT: [[OR177_I_I_I:%.*]] = or <4 x i32> [[TMP16]], [[TMP5]]
+// CHECK-NEXT: [[ASTYPE178_I_I_I:%.*]] = bitcast <4 x i32> [[OR177_I_I_I]] to <4 x float>
+// CHECK-NEXT: [[OR181_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[OR166_I_I_I]], <4 x i32> [[OR172_I_I_I]], <4 x i32> splat (i32 23))
+// CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 0
+// CHECK-NEXT: [[TMP18:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP17]], i1 false)
+// CHECK-NEXT: [[VECINIT_I_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
+// CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 1
+// CHECK-NEXT: [[TMP20:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP19]], i1 false)
+// CHECK-NEXT: [[VECINIT2_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I_I_I]], i32 [[TMP20]], i64 1
+// CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 2
+// CHECK-NEXT: [[TMP22:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP21]], i1 false)
+// CHECK-NEXT: [[VECINIT4_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I_I_I]], i32 [[TMP22]], i64 2
+// CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 3
+// CHECK-NEXT: [[TMP24:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP23]], i1 false)
+// CHECK-NEXT: [[VECINIT6_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I_I_I]], i32 [[TMP24]], i64 3
+// CHECK-NEXT: [[ADD183_I_I_NEG_I:%.*]] = xor <4 x i32> [[VECINIT6_I_I_I]], splat (i32 -1)
+// CHECK-NEXT: [[ADD183_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I_I_I]], splat (i32 1)
+// CHECK-NEXT: [[SHL_MASK186_I_I_I:%.*]] = and <4 x i32> [[ADD183_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[SHL187_I_I_I:%.*]] = shl <4 x i32> [[OR181_I_I_I]], [[SHL_MASK186_I_I_I]]
+// CHECK-NEXT: [[TMP25:%.*]] = and <4 x i32> [[VECINIT6_I_I_I]], splat (i32 31)
+// CHECK-NEXT: [[SHR_MASK189_I_I_I:%.*]] = xor <4 x i32> [[TMP25]], splat (i32 31)
+// CHECK-NEXT: [[SHR190_I_I_I:%.*]] = lshr <4 x i32> [[OR172_I_I_I]], [[SHR_MASK189_I_I_I]]
+// CHECK-NEXT: [[OR191_I_I_I:%.*]] = or <4 x i32> [[SHL187_I_I_I]], [[SHR190_I_I_I]]
+// CHECK-NEXT: [[REASS_SUB10_I:%.*]] = sub nuw nsw <4 x i32> [[ADD183_I_I_NEG_I]], [[VECINIT6_I4_I_I]]
+// CHECK-NEXT: [[ADD193_I_I_NEG_I:%.*]] = shl <4 x i32> [[REASS_SUB10_I]], splat (i32 23)
+// CHECK-NEXT: [[SHR197_I_I_I:%.*]] = lshr <4 x i32> [[OR191_I_I_I]], splat (i32 9)
+// CHECK-NEXT: [[REASS_SUB16_I_I:%.*]] = add <4 x i32> [[ADD193_I_I_NEG_I]], splat (i32 864026624)
+// CHECK-NEXT: [[TMP26:%.*]] = or disjoint <4 x i32> [[SHR197_I_I_I]], [[REASS_SUB16_I_I]]
+// CHECK-NEXT: [[OR198_I_I_I:%.*]] = or <4 x i32> [[TMP26]], [[TMP5]]
+// CHECK-NEXT: [[ASTYPE199_I_I_I:%.*]] = bitcast <4 x i32> [[OR198_I_I_I]] to <4 x float>
+// CHECK-NEXT: [[MUL200_I_I_I:%.*]] = fmul <4 x float> [[ASTYPE178_I_I_I]], splat (float 0x3FF921FB40000000)
+// CHECK-NEXT: [[FNEG_I_I_I:%.*]] = fneg <4 x float> [[MUL200_I_I_I]]
+// CHECK-NEXT: [[TMP27:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I_I_I]])
+// CHECK-NEXT: [[TMP28:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[TMP27]])
+// CHECK-NEXT: [[TMP29:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE199_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[TMP28]])
+// CHECK-NEXT: [[ADD204_I_I_I:%.*]] = fadd <4 x float> [[MUL200_I_I_I]], [[TMP29]]
+// CHECK-NEXT: [[SUB205_I_I_I:%.*]] = fsub <4 x float> [[ADD204_I_I_I]], [[MUL200_I_I_I]]
+// CHECK-NEXT: [[SUB206_I_I_I:%.*]] = fsub <4 x float> [[TMP29]], [[SUB205_I_I_I]]
+// CHECK-NEXT: [[SHR207_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 30)
+// CHECK-NEXT: [[ADD209_I_I_I:%.*]] = add nuw nsw <4 x i32> [[AND146_I_I_I]], [[SHR207_I_I_I]]
+// CHECK-NEXT: [[COND_V_I2_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[ADD13_I_I_I_I]], <4 x float> [[ADD204_I_I_I]]
+// CHECK-NEXT: [[COND4_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[FNEG_I_I_I_I]], <4 x float> [[SUB206_I_I_I]]
+// CHECK-NEXT: [[COND6_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[CONV_I_I_I]], <4 x i32> [[ADD209_I_I_I]]
+// CHECK-NEXT: [[COND6_I_I:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 2)
+// CHECK-NEXT: [[MUL_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[COND_V_I2_I]]
+// CHECK-NEXT: [[MUL1_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[MUL_I_I]]
+// CHECK-NEXT: [[TMP30:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0x3DE5D93A60000000), <4 x float> splat (float 0xBE5AE5E680000000))
+// CHECK-NEXT: [[TMP31:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP30]], <4 x float> splat (float 0x3EC6DBE4A0000000))
+// CHECK-NEXT: [[TMP32:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP31]], <4 x float> splat (float 0xBF2A013A80000000))
+// CHECK-NEXT: [[TMP33:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP32]], <4 x float> splat (float 0x3F811110E0000000))
+// CHECK-NEXT: [[FNEG_I3_I:%.*]] = fneg <4 x float> [[MUL1_I_I]]
+// CHECK-NEXT: [[MUL5_I_I:%.*]] = fmul <4 x float> [[TMP33]], [[FNEG_I3_I]]
+// CHECK-NEXT: [[TMP34:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[COND4_V_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[MUL5_I_I]])
+// CHECK-NEXT: [[FNEG7_I_I:%.*]] = fneg <4 x float> [[COND4_V_I_I]]
+// CHECK-NEXT: [[TMP35:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP34]], <4 x float> [[FNEG7_I_I]])
+// CHECK-NEXT: [[TMP36:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL1_I_I]], <4 x float> splat (float 0x3FC5555560000000), <4 x float> [[TMP35]])
+// CHECK-NEXT: [[SUB_I_I:%.*]] = fsub <4 x float> [[COND_V_I2_I]], [[TMP36]]
+// CHECK-NEXT: [[FNEG_I_I:%.*]] = fneg <4 x float> [[SUB_I_I]]
+// CHECK-NEXT: [[TMP37:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0xBDA8FAE9C0000000), <4 x float> splat (float 0x3E21EE9EC0000000))
+// CHECK-NEXT: [[TMP38:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP37]], <4 x float> splat (float 0xBE92524740000000))
+// CHECK-NEXT: [[TMP39:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP38]], <4 x float> splat (float 0x3EFA015C40000000))
+// CHECK-NEXT: [[TMP40:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP39]], <4 x float> splat (float 0xBF56C16C00000000))
+// CHECK-NEXT: [[TMP41:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP40]], <4 x float> splat (float 0x3FA5555560000000))
+// CHECK-NEXT: [[MUL5_I5_I:%.*]] = fmul <4 x float> [[MUL_I_I]], [[TMP41]]
+// CHECK-NEXT: [[TMP42:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[COND_V_I2_I]])
+// CHECK-NEXT: [[AND_I_I:%.*]] = bitcast <4 x float> [[TMP42]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I6_I:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -16777216)
+// CHECK-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -1050253722)
+// CHECK-NEXT: [[AND938_I_I:%.*]] = icmp ult <4 x i32> [[TMP43]], splat (i32 11429479)
+// CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[AND938_I_I]], <4 x i32> [[SUB_I6_I]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[CMP11_I_I:%.*]] = icmp samesign ugt <4 x i32> [[AND_I_I]], splat (i32 1061683200)
+// CHECK-NEXT: [[COND14_I_I:%.*]] = select <4 x i1> [[CMP11_I_I]], <4 x i32> splat (i32 1049624576), <4 x i32> [[TMP44]]
+// CHECK-NEXT: [[TMP45:%.*]] = bitcast <4 x i32> [[COND14_I_I]] to <4 x float>
+// CHECK-NEXT: [[FNEG_I7_I:%.*]] = fneg <4 x float> [[TMP45]]
+// CHECK-NEXT: [[TMP46:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[FNEG_I7_I]])
+// CHECK-NEXT: [[SUB16_I_I:%.*]] = fsub <4 x float> splat (float 1.000000e+00), [[TMP45]]
+// CHECK-NEXT: [[FNEG17_I_I:%.*]] = fneg <4 x float> [[COND_V_I2_I]]
+// CHECK-NEXT: [[MUL18_I_I:%.*]] = fmul <4 x float> [[COND4_V_I_I]], [[FNEG17_I_I]]
+// CHECK-NEXT: [[TMP47:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[MUL5_I5_I]], <4 x float> [[MUL18_I_I]])
+// CHECK-NEXT: [[TMP48:%.*]] = fsub <4 x float> [[TMP47]], [[TMP46]]
+// CHECK-NEXT: [[SUB21_I_I:%.*]] = fadd <4 x float> [[SUB16_I_I]], [[TMP48]]
+// CHECK-NEXT: [[TMP49:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 1)
+// CHECK-NEXT: [[TMP50:%.*]] = icmp eq <4 x i32> [[TMP49]], zeroinitializer
+// CHECK-NEXT: [[COND_V_I_I:%.*]] = select <4 x i1> [[TMP50]], <4 x float> [[SUB21_I_I]], <4 x float> [[FNEG_I_I]]
+// CHECK-NEXT: [[COND_I_I:%.*]] = bitcast <4 x float> [[COND_V_I_I]] to <4 x i32>
+// CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp ne <4 x i32> [[COND6_I_I]], zeroinitializer
+// CHECK-NEXT: [[SEXT6_I_I:%.*]] = sext <4 x i1> [[CMP5_I_I]] to <4 x i32>
+// CHECK-NEXT: [[SHL_I_I:%.*]] = shl nsw <4 x i32> [[SEXT6_I_I]], splat (i32 31)
+// CHECK-NEXT: [[XOR_I_I:%.*]] = xor <4 x i32> [[SHL_I_I]], [[COND_I_I]]
+// CHECK-NEXT: [[ASTYPE7_I_I:%.*]] = bitcast <4 x i32> [[XOR_I_I]] to <4 x float>
+// CHECK-NEXT: [[TMP51:%.*]] = fcmp ueq <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x7FF0000000000000)
+// CHECK-NEXT: [[COND_V_I_I_I:%.*]] = select <4 x i1> [[TMP51]], <4 x float> splat (float 0x7FF8000000000000), <4 x float> [[ASTYPE7_I_I]]
+// CHECK-NEXT: store <4 x float> [[COND_V_I_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global float4 *f) {
+ *f = cos(*f);
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"float4*"}
+// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/math/fabs.cl b/libclc/test/math/fabs.cl
new file mode 100644
index 0000000000000..5935fc9f62627
--- /dev/null
+++ b/libclc/test/math/fabs.cl
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 4 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT: [[ELT_ABS_I_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[TMP0]])
+// CHECK-NEXT: store float [[ELT_ABS_I_I]], ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global float *f) {
+ *f = fabs(*f);
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"float*"}
+// CHECK: [[META9]] = !{!""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"float", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/math/rsqrt.cl b/libclc/test/math/rsqrt.cl
new file mode 100644
index 0000000000000..fa5e13f2705f2
--- /dev/null
+++ b/libclc/test/math/rsqrt.cl
@@ -0,0 +1,48 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#if defined(cl_khr_fp64)
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) initializes((16, 32)) [[X:%.*]], ptr addrspace(1) noundef align 32 captures(none) initializes((32, 64)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]), !fpmath [[META14:![0-9]+]]
+// CHECK-NEXT: [[DIV_I_I:%.*]] = fdiv contract <4 x float> splat (float 1.000000e+00), [[TMP1]], !fpmath [[META15:![0-9]+]]
+// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[X]], i64 16
+// CHECK-NEXT: store <4 x float> [[DIV_I_I]], ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr addrspace(1) [[Y]], align 32, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP3:%.*]] = tail call contract <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP2]])
+// CHECK-NEXT: [[DIV_I_I1:%.*]] = fdiv contract <4 x double> splat (double 1.000000e+00), [[TMP3]]
+// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[Y]], i64 32
+// CHECK-NEXT: store <4 x double> [[DIV_I_I1]], ptr addrspace(1) [[ARRAYIDX4_I]], align 32, !tbaa [[TBAA11]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global float4 *x, __global double4 *y) {
+ x[1] = rsqrt(x[0]);
+ y[1] = rsqrt(y[0]);
+}
+
+#endif
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none"}
+// CHECK: [[META8]] = !{!"float4*", !"double4*"}
+// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*", !"double __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!"", !""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META14]] = !{float 3.000000e+00}
+// CHECK: [[META15]] = !{float 2.500000e+00}
+//.
diff --git a/libclc/test/misc/as_type.cl b/libclc/test/misc/as_type.cl
new file mode 100644
index 0000000000000..a475956e62d0a
--- /dev/null
+++ b/libclc/test/misc/as_type.cl
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global int4 *x, __global float4 *y) {
+ *x = as_int4(*y);
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none"}
+// CHECK: [[META8]] = !{!"int4*", !"float4*"}
+// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!"", !""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/misc/convert.cl b/libclc/test/misc/convert.cl
new file mode 100644
index 0000000000000..cd8c41465ed38
--- /dev/null
+++ b/libclc/test/misc/convert.cl
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT: [[CONV_I_I:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[CONV_I_I]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global int4 *x, __global float4 *y) {
+ *x = convert_int4(*y);
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none"}
+// CHECK: [[META8]] = !{!"int4*", !"float4*"}
+// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!"", !""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/rsqrt.cl b/libclc/test/rsqrt.cl
deleted file mode 100644
index 4eebfe8ecf7f9..0000000000000
--- a/libclc/test/rsqrt.cl
+++ /dev/null
@@ -1,14 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-__kernel void foo(float4 *x, double4 *y) {
- x[1] = rsqrt(x[0]);
- y[1] = rsqrt(y[0]);
-}
diff --git a/libclc/test/subsat.cl b/libclc/test/subsat.cl
deleted file mode 100644
index 5e6fbdcfbef9e..0000000000000
--- a/libclc/test/subsat.cl
+++ /dev/null
@@ -1,27 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void test_subsat_char(char *a, char x, char y) {
- *a = sub_sat(x, y);
- return;
-}
-
-__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) {
- *a = sub_sat(x, y);
- return;
-}
-
-__kernel void test_subsat_long(long *a, long x, long y) {
- *a = sub_sat(x, y);
- return;
-}
-
-__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) {
- *a = sub_sat(x, y);
- return;
-}
\ No newline at end of file
diff --git a/libclc/test/work-item/get_group_id.cl b/libclc/test/work-item/get_group_id.cl
new file mode 100644
index 0000000000000..f73f8f76cd519
--- /dev/null
+++ b/libclc/test/work-item/get_group_id.cl
@@ -0,0 +1,33 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 4 captures(none) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.x()
+// CHECK-NEXT: [[RETVAL_0_I:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(1) [[I]], i64 [[RETVAL_0_I]]
+// CHECK-NEXT: store i32 1, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT: ret void
+//
+__kernel void foo(__global int *i) {
+ i[get_group_id(0)] = 1;
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"int*"}
+// CHECK: [[META9]] = !{!""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"int", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
>From e79ba7281bd21e8a145d325a706db8e56d7468dc Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 7 Jul 2025 13:01:49 +0100
Subject: [PATCH 09/10] fix CI?
---
.ci/compute_projects.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/.ci/compute_projects.py b/.ci/compute_projects.py
index c3cf714ce6c10..4c268b7221663 100644
--- a/.ci/compute_projects.py
+++ b/.ci/compute_projects.py
@@ -46,6 +46,7 @@
"mlir",
"polly",
"flang",
+ "libclc",
},
"lld": {"bolt", "cross-project-tests"},
# TODO(issues/132795): LLDB should be enabled on clang changes.
@@ -75,7 +76,7 @@
# This mapping describes runtimes that should be tested when the key project is
# touched.
DEPENDENT_RUNTIMES_TO_TEST = {
- "clang": {"compiler-rt"},
+ "clang": {"compiler-rt", "libclc"},
"clang-tools-extra": {"libc"},
"libc": {"libc"},
".ci": {"compiler-rt", "libc"},
@@ -132,6 +133,7 @@
"lld": "check-lld",
"flang": "check-flang",
"libc": "check-libc",
+ "libclc": "check-libclc",
"lld": "check-lld",
"lldb": "check-lldb",
"mlir": "check-mlir",
@@ -139,7 +141,7 @@
"polly": "check-polly",
}
-RUNTIMES = {"libcxx", "libcxxabi", "libunwind", "compiler-rt", "libc"}
+RUNTIMES = {"libcxx", "libcxxabi", "libunwind", "compiler-rt", "libc", "libclc"}
def _add_dependencies(projects: Set[str], runtimes: Set[str]) -> Set[str]:
>From a804b092dd00d5f40883350b2ad486a38146981f Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 7 Jul 2025 13:08:37 +0100
Subject: [PATCH 10/10] fix ci
---
.ci/compute_projects.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/.ci/compute_projects.py b/.ci/compute_projects.py
index 4c268b7221663..e51697653b5f3 100644
--- a/.ci/compute_projects.py
+++ b/.ci/compute_projects.py
@@ -46,7 +46,6 @@
"mlir",
"polly",
"flang",
- "libclc",
},
"lld": {"bolt", "cross-project-tests"},
# TODO(issues/132795): LLDB should be enabled on clang changes.
@@ -63,7 +62,6 @@
"mlir",
"polly",
"flang",
- "libclc",
"openmp",
},
}
@@ -114,6 +112,7 @@
"cross-project-tests",
"flang",
"libc",
+ "libclc",
"lldb",
"openmp",
"polly",
More information about the llvm-commits
mailing list