https://github.com/frasercrmck updated 
https://github.com/llvm/llvm-project/pull/87989

>From b41b2032fdb01bd91d32255bf22a94315b58a017 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 30 Jun 2025 10:59:02 +0100
Subject: [PATCH 1/9] [libclc] Place libclc files in clang's resource dir

---
 libclc/CMakeLists.txt | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index e2871d1b01a16..8bc3a75739fcd 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -63,6 +63,9 @@ if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL 
CMAKE_CURRENT_SOURCE_DI
       set( ${tool}_target )
     endforeach()
   endif()
+
+  # Setup the paths where libclc runtimes should be stored.
+  set( LIBCLC_OUTPUT_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )
 else()
   # In-tree configuration
   set( LIBCLC_STANDALONE_BUILD FALSE )
@@ -82,10 +85,14 @@ else()
     get_host_tool_path( llvm-link LLVM_LINK llvm-link_exe llvm-link_target )
     get_host_tool_path( opt OPT opt_exe opt_target )
   endif()
-endif()
 
-# Setup the paths where libclc runtimes should be stored.
-set( LIBCLC_OUTPUT_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )
+  # Setup the paths where libclc runtimes should be stored. By default, in an
+  # in-tree build we place the libraries in clang's resource driectory.
+  get_clang_resource_dir( LIBCLC_OUTPUT_DIR PREFIX 
${LLVM_LIBRARY_OUTPUT_INTDIR}/.. )
+
+  # Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR.
+  set( LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_OUTPUT_DIR}/lib/libclc )
+endif()
 
 if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} )
   message( WARNING "Using custom LLVM tools to build libclc: "

>From 4057b6af7db11b59878739bff4d826a9fc616bbf Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Tue, 1 Jul 2025 10:56:54 +0100
Subject: [PATCH 2/9] [clang] Add the ability to link libclc OpenCL libraries

This commit adds driver support for linking libclc OpenCL libraries. It
takes the form of a new optional flag: --libclc-lib=namespec. Nothing is
linked unless this flag is specified.

Not all libclc targets have corresponding clang targets. For this reason
it is desirable for users to be able to specify a libclc library name.
We support this by taking both a library name (without the .bc suffix)
or a filename. Both of these are searched for in the clang resource
directory or in the LIBRARY_PATH environment variable. Filenames are
also checked themselves so that absolute paths can be provided. The
syntax for specifying filenames (as opposed to library names) uses a
leading colon (:), inspired by the -l option.

To accommodate this option, libclc libraries are now placed into clang's
resource directory in an in-tree configuration. The aliases are not
currently placed there to avoid polluting the directory, but that can be
changed. The libraries are all placed in <resource-dir>/lib/libclc and
are not grouped under host-specific directories as some other runtime
libraries are; it is not expected that OpenCL libraries will differ
depending on the host toolchain.

Currently only the AMDGPU toolchain supports this option as a proof of
concept. Other targets such as NVPTX or SPIR/SPIR-V could support it
too. We could optionally let target toolchains search for libclc
libraries themselves, possibly when passed an empty --libclc-lib.
---
 .../clang/Basic/DiagnosticDriverKinds.td      |  3 +
 clang/include/clang/Driver/CommonArgs.h       |  3 +
 clang/include/clang/Driver/Options.td         |  2 +
 clang/lib/Driver/ToolChains/AMDGPU.cpp        |  2 +
 clang/lib/Driver/ToolChains/CommonArgs.cpp    | 59 +++++++++++++++++++
 clang/test/Driver/Inputs/libclc/libclc.bc     |  0
 .../Driver/Inputs/libclc/subdir/libclc.bc     |  0
 clang/test/Driver/opencl-libclc.cl            | 10 ++++
 8 files changed, 79 insertions(+)
 create mode 100644 clang/test/Driver/Inputs/libclc/libclc.bc
 create mode 100644 clang/test/Driver/Inputs/libclc/subdir/libclc.bc
 create mode 100644 clang/test/Driver/opencl-libclc.cl

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 34b6c0d7a8acd..019161c22a24f 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -392,6 +392,9 @@ def warn_drv_fraw_string_literals_in_cxx11 : Warning<
   "ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C 
and C++ standards before C++11">,
   InGroup<UnusedCommandLineArgument>;
 
+def err_drv_libclc_not_found : Error<
+  "no libclc library '%0' found in the clang resource directory or in 
LIBRARY_PATH">;
+
 def err_drv_invalid_malign_branch_EQ : Error<
   "invalid argument '%0' to -malign-branch=; each element must be one of: %1">;
 
diff --git a/clang/include/clang/Driver/CommonArgs.h 
b/clang/include/clang/Driver/CommonArgs.h
index 26aa3ccf84786..7e8ab82eb7863 100644
--- a/clang/include/clang/Driver/CommonArgs.h
+++ b/clang/include/clang/Driver/CommonArgs.h
@@ -215,6 +215,9 @@ void addOpenMPDeviceRTL(const Driver &D, const 
llvm::opt::ArgList &DriverArgs,
                         StringRef BitcodeSuffix, const llvm::Triple &Triple,
                         const ToolChain &HostTC);
 
+void addOpenCLBuiltinsLib(const Driver &D, const llvm::opt::ArgList 
&DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args);
+
 void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
                            const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs,
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 54c71b066f9d4..cf24bcac9c07c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1425,6 +1425,8 @@ def openacc_macro_override_EQ
 
 // End Clang specific/exclusive options for OpenACC.
 
+def libclc_lib_EQ : Joined<["--"], "libclc-lib=">, Group<opencl_Group>,
+  HelpText<"Namespec of libclc OpenCL bitcode library to link">;
 def libomptarget_amdgpu_bc_path_EQ : Joined<["--"], 
"libomptarget-amdgpu-bc-path=">, Group<i_Group>,
   HelpText<"Path to libomptarget-amdgcn bitcode library">;
 def libomptarget_amdgcn_bc_path_EQ : Joined<["--"], 
"libomptarget-amdgcn-bc-path=">, Group<i_Group>,
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index b7564a0495da8..e6d1baa2a1caa 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -795,6 +795,8 @@ void AMDGPUToolChain::addClangTargetOptions(
     CC1Args.push_back("-fvisibility=hidden");
     CC1Args.push_back("-fapply-global-visibility-to-externs");
   }
+
+  addOpenCLBuiltinsLib(getDriver(), DriverArgs, CC1Args);
 }
 
 void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index bdd77ac84913c..40aaa7ca324be 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2942,6 +2942,65 @@ void tools::addHIPRuntimeLibArgs(const ToolChain &TC, 
Compilation &C,
   }
 }
 
+void tools::addOpenCLBuiltinsLib(const Driver &D,
+                                 const llvm::opt::ArgList &DriverArgs,
+                                 llvm::opt::ArgStringList &CC1Args) {
+  // Check whether user specifies a libclc bytecode library
+  if (const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ)) {
+    SmallVector<StringRef, 8> LibraryPaths;
+
+    // Add user defined library paths from LIBRARY_PATH.
+    std::optional<std::string> LibPath =
+        llvm::sys::Process::GetEnv("LIBRARY_PATH");
+    if (LibPath) {
+      SmallVector<StringRef, 8> Frags;
+      const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
+      llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
+      for (StringRef Path : Frags)
+        LibraryPaths.emplace_back(Path.trim());
+    }
+
+    // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/
+    SmallString<128> LibclcPath(D.ResourceDir);
+    llvm::sys::path::append(LibclcPath, "lib", "libclc");
+    LibraryPaths.emplace_back(LibclcPath);
+
+    bool FoundBCLibrary = false;
+    StringRef LibclcNamespec(A->getValue());
+
+    // If the namespec is of the form :filename, search for that file.
+    bool FilenameSearch = LibclcNamespec.starts_with(":");
+    SmallString<128> LibclcTargetFile(
+        LibclcNamespec.drop_front(FilenameSearch ? 1 : 0));
+
+    if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
+      FoundBCLibrary = true;
+      CC1Args.push_back("-mlink-builtin-bitcode");
+      CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile));
+    } else {
+      // Search the library paths for the file
+      if (!FilenameSearch)
+        LibclcTargetFile += ".bc";
+
+      for (StringRef LibraryPath : LibraryPaths) {
+        SmallString<128> LibclcPath(LibraryPath);
+        llvm::sys::path::append(LibclcPath, LibclcTargetFile);
+        if (llvm::sys::fs::exists(LibclcPath)) {
+          FoundBCLibrary = true;
+          CC1Args.push_back("-mlink-builtin-bitcode");
+          CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath));
+          break;
+        }
+      }
+    }
+
+    // Since the user requested a library, if we haven't one then report an
+    // error.
+    if (!FoundBCLibrary)
+      D.Diag(diag::err_drv_libclc_not_found) << LibclcTargetFile;
+  }
+}
+
 void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
                                   const llvm::opt::ArgList &Args,
                                   llvm::opt::ArgStringList &CmdArgs,
diff --git a/clang/test/Driver/Inputs/libclc/libclc.bc 
b/clang/test/Driver/Inputs/libclc/libclc.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/libclc/subdir/libclc.bc 
b/clang/test/Driver/Inputs/libclc/subdir/libclc.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/opencl-libclc.cl 
b/clang/test/Driver/opencl-libclc.cl
new file mode 100644
index 0000000000000..34b863b128682
--- /dev/null
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -0,0 +1,10 @@
+// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib 
--libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
+
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### 
-target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### 
-target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
+
+// CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc
+// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc

>From bde5f5325e17525820bc145abb1f4c43e358d59b Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Tue, 1 Jul 2025 12:26:22 +0100
Subject: [PATCH 3/9] adjust LIBRARY_PATH usage in tests

---
 clang/test/Driver/opencl-libclc.cl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/test/Driver/opencl-libclc.cl 
b/clang/test/Driver/opencl-libclc.cl
index 34b863b128682..8608a4f741b23 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -1,10 +1,10 @@
 // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib 
--libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
 
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### 
-target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### 
-target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s 
--check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck 
%s --check-prefix CHECK-SUBDIR
+// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
 
 // CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc
 // CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc

>From 5496f2dd3b449895f8de880f937e6fe3c8f29cd8 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Tue, 1 Jul 2025 12:50:15 +0100
Subject: [PATCH 4/9] fix path checks

---
 clang/test/Driver/opencl-libclc.cl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/opencl-libclc.cl 
b/clang/test/Driver/opencl-libclc.cl
index 8608a4f741b23..b7780d1af8d40 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -6,5 +6,5 @@
 // RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck 
%s --check-prefix CHECK-SUBDIR
 // RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
 
-// CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc
-// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc
+// CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc
+// CHECK-SUBDIR: 
-mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc

>From df02a9e5a46206d5ed4880c293fbd0faf6728db2 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 7 Jul 2025 12:06:35 +0100
Subject: [PATCH 5/9] consume_front

---
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 40aaa7ca324be..e6d9974d734bf 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2969,9 +2969,8 @@ void tools::addOpenCLBuiltinsLib(const Driver &D,
     StringRef LibclcNamespec(A->getValue());
 
     // If the namespec is of the form :filename, search for that file.
-    bool FilenameSearch = LibclcNamespec.starts_with(":");
-    SmallString<128> LibclcTargetFile(
-        LibclcNamespec.drop_front(FilenameSearch ? 1 : 0));
+    bool FilenameSearch = LibclcNamespec.consume_front(":");
+    SmallString<128> LibclcTargetFile(LibclcNamespec);
 
     if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
       FoundBCLibrary = true;

>From 00b32eac8f3a06f9380a2cefeb20bef4e829d0fc Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 7 Jul 2025 12:23:10 +0100
Subject: [PATCH 6/9] remove logic for LIBRARY_PATH

---
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 67 ++++++++--------------
 clang/test/Driver/opencl-libclc.cl         |  7 +--
 2 files changed, 24 insertions(+), 50 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index e6d9974d734bf..36f335154e6bc 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2946,57 +2946,36 @@ void tools::addOpenCLBuiltinsLib(const Driver &D,
                                  const llvm::opt::ArgList &DriverArgs,
                                  llvm::opt::ArgStringList &CC1Args) {
   // Check whether user specifies a libclc bytecode library
-  if (const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ)) {
-    SmallVector<StringRef, 8> LibraryPaths;
+  const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ);
+  if (!A)
+    return;
 
-    // Add user defined library paths from LIBRARY_PATH.
-    std::optional<std::string> LibPath =
-        llvm::sys::Process::GetEnv("LIBRARY_PATH");
-    if (LibPath) {
-      SmallVector<StringRef, 8> Frags;
-      const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
-      llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
-      for (StringRef Path : Frags)
-        LibraryPaths.emplace_back(Path.trim());
-    }
+  // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/
+  SmallString<128> LibclcPath(D.ResourceDir);
+  llvm::sys::path::append(LibclcPath, "lib", "libclc");
 
-    // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/
-    SmallString<128> LibclcPath(D.ResourceDir);
-    llvm::sys::path::append(LibclcPath, "lib", "libclc");
-    LibraryPaths.emplace_back(LibclcPath);
+  // If the namespec is of the form :filename, search for that file.
+  StringRef LibclcNamespec(A->getValue());
+  bool FilenameSearch = LibclcNamespec.consume_front(":");
+  SmallString<128> LibclcTargetFile(LibclcNamespec);
 
-    bool FoundBCLibrary = false;
-    StringRef LibclcNamespec(A->getValue());
-
-    // If the namespec is of the form :filename, search for that file.
-    bool FilenameSearch = LibclcNamespec.consume_front(":");
-    SmallString<128> LibclcTargetFile(LibclcNamespec);
+  if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
+    CC1Args.push_back("-mlink-builtin-bitcode");
+    CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile));
+  } else {
+    // Search the library paths for the file
+    if (!FilenameSearch)
+      LibclcTargetFile += ".bc";
 
-    if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) {
-      FoundBCLibrary = true;
+    llvm::sys::path::append(LibclcPath, LibclcTargetFile);
+    if (llvm::sys::fs::exists(LibclcPath)) {
       CC1Args.push_back("-mlink-builtin-bitcode");
-      CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile));
+      CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath));
     } else {
-      // Search the library paths for the file
-      if (!FilenameSearch)
-        LibclcTargetFile += ".bc";
-
-      for (StringRef LibraryPath : LibraryPaths) {
-        SmallString<128> LibclcPath(LibraryPath);
-        llvm::sys::path::append(LibclcPath, LibclcTargetFile);
-        if (llvm::sys::fs::exists(LibclcPath)) {
-          FoundBCLibrary = true;
-          CC1Args.push_back("-mlink-builtin-bitcode");
-          CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath));
-          break;
-        }
-      }
-    }
-
-    // Since the user requested a library, if we haven't one then report an
-    // error.
-    if (!FoundBCLibrary)
+      // Since the user requested a library, if we haven't one then report an
+      // error.
       D.Diag(diag::err_drv_libclc_not_found) << LibclcTargetFile;
+    }
   }
 }
 
diff --git a/clang/test/Driver/opencl-libclc.cl 
b/clang/test/Driver/opencl-libclc.cl
index b7780d1af8d40..80c5d10550146 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -1,10 +1,5 @@
 // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib 
--libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s
-
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s 
--check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck 
%s --check-prefix CHECK-SUBDIR
-// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target 
amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | 
FileCheck %s --check-prefix CHECK-SUBDIR
+// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib 
--libclc-lib=:%S/Inputs/libclc/subdir/libclc.bc %s 2>&1 | FileCheck %s 
--check-prefix CHECK-SUBDIR
 
 // CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc
 // CHECK-SUBDIR: 
-mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc

>From 1c16dbbd8225cb481bfd9a00fa61493a03478e3f Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 7 Jul 2025 12:29:04 +0100
Subject: [PATCH 7/9] fix diagnostic; add error test

---
 clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 +--
 clang/test/Driver/opencl-libclc.cl                 | 4 ++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 019161c22a24f..8d07ade73ec89 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -392,8 +392,7 @@ def warn_drv_fraw_string_literals_in_cxx11 : Warning<
   "ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C 
and C++ standards before C++11">,
   InGroup<UnusedCommandLineArgument>;
 
-def err_drv_libclc_not_found : Error<
-  "no libclc library '%0' found in the clang resource directory or in 
LIBRARY_PATH">;
+def err_drv_libclc_not_found : Error<"no libclc library '%0' found in the 
clang resource directory">;
 
 def err_drv_invalid_malign_branch_EQ : Error<
   "invalid argument '%0' to -malign-branch=; each element must be one of: %1">;
diff --git a/clang/test/Driver/opencl-libclc.cl 
b/clang/test/Driver/opencl-libclc.cl
index 80c5d10550146..185690768c75b 100644
--- a/clang/test/Driver/opencl-libclc.cl
+++ b/clang/test/Driver/opencl-libclc.cl
@@ -1,5 +1,9 @@
 // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib 
--libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s
 // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib 
--libclc-lib=:%S/Inputs/libclc/subdir/libclc.bc %s 2>&1 | FileCheck %s 
--check-prefix CHECK-SUBDIR
 
+// RUN: not %clang -### -target amdgcn-amd-amdhsa --no-offloadlib 
--libclc-lib=:%S/Inputs/libclc/subdir/not-here.bc %s 2>&1 | FileCheck %s 
--check-prefix CHECK-ERROR
+
 // CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc
 // CHECK-SUBDIR: 
-mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc
+
+// CHECK-ERROR: no libclc library{{.*}}not-here.bc' found in the clang 
resource directory

>From b83c111bba9bc4531a941170018060fdbf7de6d6 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Thu, 4 Apr 2024 17:49:13 +0100
Subject: [PATCH 8/9] [libclc] Add initial LIT tests

These tests aren't very meaningful and aren't immune to false positives,
but they do get the project building when running 'check-all' and so
enable libclc testing in CI.
---
 libclc/CMakeLists.txt                 |   4 +
 libclc/test/CMakeLists.txt            |  35 +++
 libclc/test/add_sat.cl                |  11 -
 libclc/test/as_type.cl                |  11 -
 libclc/test/convert.cl                |  11 -
 libclc/test/cos.cl                    |  11 -
 libclc/test/cross.cl                  |  11 -
 libclc/test/fabs.cl                   |  11 -
 libclc/test/geometric/cross.cl        |  51 +++++
 libclc/test/get_group_id.cl           |  11 -
 libclc/test/integer/add_sat.cl        |  32 +++
 libclc/test/integer/sub_sat.cl        |  72 +++++++
 libclc/test/lit.cfg.py                |  44 ++++
 libclc/test/lit.site.cfg.py.in        |  23 ++
 libclc/test/math/cos.cl               | 296 ++++++++++++++++++++++++++
 libclc/test/math/fabs.cl              |  32 +++
 libclc/test/math/rsqrt.cl             |  48 +++++
 libclc/test/misc/as_type.cl           |  31 +++
 libclc/test/misc/convert.cl           |  32 +++
 libclc/test/rsqrt.cl                  |  14 --
 libclc/test/subsat.cl                 |  27 ---
 libclc/test/work-item/get_group_id.cl |  33 +++
 22 files changed, 733 insertions(+), 118 deletions(-)
 create mode 100644 libclc/test/CMakeLists.txt
 delete mode 100644 libclc/test/add_sat.cl
 delete mode 100644 libclc/test/as_type.cl
 delete mode 100644 libclc/test/convert.cl
 delete mode 100644 libclc/test/cos.cl
 delete mode 100644 libclc/test/cross.cl
 delete mode 100644 libclc/test/fabs.cl
 create mode 100644 libclc/test/geometric/cross.cl
 delete mode 100644 libclc/test/get_group_id.cl
 create mode 100644 libclc/test/integer/add_sat.cl
 create mode 100644 libclc/test/integer/sub_sat.cl
 create mode 100644 libclc/test/lit.cfg.py
 create mode 100644 libclc/test/lit.site.cfg.py.in
 create mode 100644 libclc/test/math/cos.cl
 create mode 100644 libclc/test/math/fabs.cl
 create mode 100644 libclc/test/math/rsqrt.cl
 create mode 100644 libclc/test/misc/as_type.cl
 create mode 100644 libclc/test/misc/convert.cl
 delete mode 100644 libclc/test/rsqrt.cl
 delete mode 100644 libclc/test/subsat.cl
 create mode 100644 libclc/test/work-item/get_group_id.cl

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 8bc3a75739fcd..673309c88e2cd 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -494,3 +494,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
     )
   endforeach( d )
 endforeach( t )
+
+if( NOT LIBCLC_STANDALONE_BUILD )
+  add_subdirectory( test )
+endif()
diff --git a/libclc/test/CMakeLists.txt b/libclc/test/CMakeLists.txt
new file mode 100644
index 0000000000000..653b39c2821a7
--- /dev/null
+++ b/libclc/test/CMakeLists.txt
@@ -0,0 +1,35 @@
+set( LIBCLC_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
+
+set( LIBCLC_TEST_TARGETS_ALL
+  amdgcn-mesa-mesa3d
+)
+
+foreach( target IN LISTS LIBCLC_TEST_TARGETS_ALL )
+  # If we haven't built this libclc target, don't build the tests
+  if( NOT TARGET prepare-${target} )
+    message( WARNING "libclc tests require target ${target}. Tests will not be 
built" )
+    # Add a dummy target
+    add_custom_target( check-libclc )
+    return()
+  endif()
+
+  list( APPEND LIBCLC_TEST_DEPS prepare-${target} )
+endforeach()
+
+list( APPEND LIBCLC_TEST_DEPS
+  ${clang_target}
+  FileCheck
+)
+
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
+)
+
+add_lit_testsuite( check-libclc
+  "Running libclc regression tests"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS ${LIBCLC_TEST_DEPS}
+)
diff --git a/libclc/test/add_sat.cl b/libclc/test/add_sat.cl
deleted file mode 100644
index 87c3d39df3542..0000000000000
--- a/libclc/test/add_sat.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(__global char *a, __global char *b, __global char *c) {
-  *a = add_sat(*b, *c);
-}
diff --git a/libclc/test/as_type.cl b/libclc/test/as_type.cl
deleted file mode 100644
index a926f48c4ea0c..0000000000000
--- a/libclc/test/as_type.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(int4 *x, float4 *y) {
-  *x = as_int4(*y);
-}
diff --git a/libclc/test/convert.cl b/libclc/test/convert.cl
deleted file mode 100644
index 8eba608dc5f8c..0000000000000
--- a/libclc/test/convert.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(int4 *x, float4 *y) {
-  *x = convert_int4(*y);
-}
diff --git a/libclc/test/cos.cl b/libclc/test/cos.cl
deleted file mode 100644
index 92a998b3ba5f7..0000000000000
--- a/libclc/test/cos.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(float4 *f) {
-  *f = cos(*f);
-}
diff --git a/libclc/test/cross.cl b/libclc/test/cross.cl
deleted file mode 100644
index 90762d0d073a6..0000000000000
--- a/libclc/test/cross.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(float4 *f) {
-  *f = cross(f[0], f[1]);
-}
diff --git a/libclc/test/fabs.cl b/libclc/test/fabs.cl
deleted file mode 100644
index 3f5a964e0418a..0000000000000
--- a/libclc/test/fabs.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(float *f) {
-  *f = fabs(*f);
-}
diff --git a/libclc/test/geometric/cross.cl b/libclc/test/geometric/cross.cl
new file mode 100644
index 0000000000000..4cb8c53bea5ee
--- /dev/null
+++ b/libclc/test/geometric/cross.cl
@@ -0,0 +1,51 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] 
!kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] 
!kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual 
[[META10:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], 
align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr 
addrspace(1) [[F]], i64 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr addrspace(1) 
[[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
+// CHECK-NEXT:    [[TMP6:%.*]] = fneg float [[TMP5]]
+// CHECK-NEXT:    [[NEG_I_I:%.*]] = fmul float [[TMP4]], [[TMP6]]
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call float @llvm.fmuladd.f32(float 
[[TMP2]], float [[TMP3]], float [[NEG_I_I]])
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP10:%.*]] = fneg float [[TMP3]]
+// CHECK-NEXT:    [[NEG3_I_I:%.*]] = fmul float [[TMP9]], [[TMP10]]
+// CHECK-NEXT:    [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float 
[[TMP4]], float [[TMP8]], float [[NEG3_I_I]])
+// CHECK-NEXT:    [[TMP12:%.*]] = fneg float [[TMP8]]
+// CHECK-NEXT:    [[NEG6_I_I:%.*]] = fmul float [[TMP2]], [[TMP12]]
+// CHECK-NEXT:    [[TMP13:%.*]] = tail call float @llvm.fmuladd.f32(float 
[[TMP9]], float [[TMP5]], float [[NEG6_I_I]])
+// CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> <float poison, 
float poison, float poison, float 0.000000e+00>, float [[TMP7]], i64 0
+// CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float 
[[TMP11]], i64 1
+// CHECK-NEXT:    [[VECINIT8_I_I:%.*]] = insertelement <4 x float> [[TMP15]], 
float [[TMP13]], i64 2
+// CHECK-NEXT:    store <4 x float> [[VECINIT8_I_I]], ptr addrspace(1) [[F]], 
align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global float4 *f) {
+  *f = cross(f[0], f[1]);
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"float4*"}
+// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/get_group_id.cl b/libclc/test/get_group_id.cl
deleted file mode 100644
index c2349a0076889..0000000000000
--- a/libclc/test/get_group_id.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void foo(int *i) {
-  i[get_group_id(0)] = 1;
-}
diff --git a/libclc/test/integer/add_sat.cl b/libclc/test/integer/add_sat.cl
new file mode 100644
index 0000000000000..ef5bf77b67d21
--- /dev/null
+++ b/libclc/test/integer/add_sat.cl
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) 
initializes((0, 1)) [[A:%.*]], ptr addrspace(1) noundef readonly align 1 
captures(none) [[B:%.*]], ptr addrspace(1) noundef readonly align 1 
captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] 
!kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual 
[[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type 
[[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(1) [[B]], align 1, 
!tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(1) [[C]], align 1, 
!tbaa [[TBAA10]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8 
@llvm.sadd.sat.i8(i8 [[TMP0]], i8 [[TMP1]])
+// CHECK-NEXT:    store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, 
!tbaa [[TBAA10]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global char *a, __global char *b, __global char *c) {
+  *a = add_sat(*b, *c);
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none", !"none"}
+// CHECK: [[META8]] = !{!"char*", !"char*", !"char*"}
+// CHECK: [[META9]] = !{!"", !"", !""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/integer/sub_sat.cl b/libclc/test/integer/sub_sat.cl
new file mode 100644
index 0000000000000..7c3f0a3aa306f
--- /dev/null
+++ b/libclc/test/integer/sub_sat.cl
@@ -0,0 +1,72 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_char(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) 
initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] 
!kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] 
!kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8 
@llvm.ssub.sat.i8(i8 [[X]], i8 [[Y]])
+// CHECK-NEXT:    store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, 
!tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT:    ret void
+//
+__kernel void test_subsat_char(__global char *a, char x, char y) {
+  *a = sub_sat(x, y);
+  return;
+}
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_uchar(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) 
initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) 
local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] 
!kernel_arg_access_qual [[META7]] !kernel_arg_type [[META13:![0-9]+]] 
!kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META9]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8 
@llvm.usub.sat.i8(i8 [[X]], i8 [[Y]])
+// CHECK-NEXT:    store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, 
!tbaa [[TBAA10]]
+// CHECK-NEXT:    ret void
+//
+__kernel void test_subsat_uchar(__global uchar *a, uchar x, uchar y) {
+  *a = sub_sat(x, y);
+  return;
+}
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_long(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) 
initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) 
local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] 
!kernel_arg_access_qual [[META7]] !kernel_arg_type [[META14:![0-9]+]] 
!kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META9]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i64 
@llvm.ssub.sat.i64(i64 [[X]], i64 [[Y]])
+// CHECK-NEXT:    store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, 
!tbaa [[TBAA15:![0-9]+]]
+// CHECK-NEXT:    ret void
+//
+__kernel void test_subsat_long(__global long *a, long x, long y) {
+  *a = sub_sat(x, y);
+  return;
+}
+
+// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_ulong(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) 
initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) 
local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] 
!kernel_arg_access_qual [[META7]] !kernel_arg_type [[META17:![0-9]+]] 
!kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META9]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i64 
@llvm.usub.sat.i64(i64 [[X]], i64 [[Y]])
+// CHECK-NEXT:    store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, 
!tbaa [[TBAA15]]
+// CHECK-NEXT:    ret void
+//
+__kernel void test_subsat_ulong(__global ulong *a, ulong x, ulong y) {
+  *a = sub_sat(x, y);
+  return;
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 0, i32 0}
+// CHECK: [[META7]] = !{!"none", !"none", !"none"}
+// CHECK: [[META8]] = !{!"char*", !"char", !"char"}
+// CHECK: [[META9]] = !{!"", !"", !""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META13]] = !{!"uchar*", !"uchar", !"uchar"}
+// CHECK: [[META14]] = !{!"long*", !"long", !"long"}
+// CHECK: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0}
+// CHECK: [[META16]] = !{!"long", [[META11]], i64 0}
+// CHECK: [[META17]] = !{!"ulong*", !"ulong", !"ulong"}
+//.
diff --git a/libclc/test/lit.cfg.py b/libclc/test/lit.cfg.py
new file mode 100644
index 0000000000000..03532dad12d2a
--- /dev/null
+++ b/libclc/test/lit.cfg.py
@@ -0,0 +1,44 @@
+import os
+
+import lit.formats
+import lit.util
+
+from lit.llvm import llvm_config
+import site
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = "libclc"
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = [
+    ".cl",
+]
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.join(os.path.dirname(__file__))
+
+# test_exec_root: The root path where tests should be run.
+config.test_exec_root = os.path.join(config.test_run_dir, "test")
+
+llvm_config.use_default_substitutions()
+
+llvm_config.use_clang()
+
+tools = []
+tool_dirs = [config.llvm_tools_dir]
+
+llvm_config.add_tool_substitutions(tools, tool_dirs)
+
+# TODO: Consolidate the logic for turning on the internal shell by default for 
all LLVM test suites.
+# See https://github.com/llvm/llvm-project/issues/106636 for more details.
+#
+# We prefer the lit internal shell which provides a better user experience on 
failures
+# unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 env var.
+use_lit_shell = True
+lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if lit_shell_env:
+    use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
+
+config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell)
diff --git a/libclc/test/lit.site.cfg.py.in b/libclc/test/lit.site.cfg.py.in
new file mode 100644
index 0000000000000..a8618cf30c4fc
--- /dev/null
+++ b/libclc/test/lit.site.cfg.py.in
@@ -0,0 +1,23 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.llvm_src_root = path(r"@LLVM_SOURCE_DIR@")
+config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@")
+config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@"))
+config.llvm_libs_dir = lit_config.substitute(path(r"@LLVM_LIBS_DIR@"))
+config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@"))
+config.lit_tools_dir = path(r"@LLVM_LIT_TOOLS_DIR@")
+config.host_triple = "@LLVM_HOST_TRIPLE@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.host_arch = "@HOST_ARCH@"
+config.python_executable = "@Python3_EXECUTABLE@"
+config.libclc_src_dir = path(r"@LIBCLC_SOURCE_DIR@")
+config.test_run_dir = path(r"@LIBCLC_BINARY_DIR@")
+
+import lit.llvm
+lit.llvm.initialize(lit_config, config)
+
+# Let the main config do the real work.
+lit_config.load_config(
+    config, os.path.join(config.libclc_src_dir, "test/lit.cfg.py"))
diff --git a/libclc/test/math/cos.cl b/libclc/test/math/cos.cl
new file mode 100644
index 0000000000000..79272cce4d54d
--- /dev/null
+++ b/libclc/test/math/cos.cl
@@ -0,0 +1,296 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] 
!kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] 
!kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual 
[[META10:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], 
align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT:    [[ELT_ABS_I_I_I:%.*]] = tail call <4 x float> 
@llvm.fabs.v4f32(<4 x float> [[TMP0]])
+// CHECK-NEXT:    [[CMP_I_I:%.*]] = fcmp olt <4 x float> [[ELT_ABS_I_I_I]], 
splat (float 0x4160000000000000)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[ELT_ABS_I_I_I]], <4 x float> splat (float 
0x3FE45F3060000000), <4 x float> splat (float 5.000000e-01))
+// CHECK-NEXT:    [[ELT_TRUNC_I_I:%.*]] = tail call noundef <4 x float> 
@llvm.trunc.v4f32(<4 x float> [[TMP1]])
+// CHECK-NEXT:    [[MUL_I30_I_I_I_I:%.*]] = fmul <4 x float> 
[[ELT_TRUNC_I_I]], splat (float 0x3FF921FB40000000)
+// CHECK-NEXT:    [[FNEG_I31_I_I_I_I:%.*]] = fneg <4 x float> 
[[MUL_I30_I_I_I_I]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call noundef <4 x float> 
@llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 
0x3FF921FB40000000), <4 x float> [[FNEG_I31_I_I_I_I]])
+// CHECK-NEXT:    [[SUB_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], 
[[MUL_I30_I_I_I_I]]
+// CHECK-NEXT:    [[SUB2_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], 
[[SUB_I_I_I_I]]
+// CHECK-NEXT:    [[SUB3_I_I_I_I:%.*]] = fsub <4 x float> [[SUB2_I_I_I_I]], 
[[MUL_I30_I_I_I_I]]
+// CHECK-NEXT:    [[SUB4_I_I_I_I:%.*]] = fsub <4 x float> [[SUB3_I_I_I_I]], 
[[TMP2]]
+// CHECK-NEXT:    [[ADD_I_I_I_I:%.*]] = fadd <4 x float> [[SUB_I_I_I_I]], 
[[SUB4_I_I_I_I]]
+// CHECK-NEXT:    [[MUL_I27_I_I_I_I:%.*]] = fmul <4 x float> 
[[ELT_TRUNC_I_I]], splat (float 0x3E74442D00000000)
+// CHECK-NEXT:    [[FNEG_I28_I_I_I_I:%.*]] = fneg <4 x float> 
[[MUL_I27_I_I_I_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call noundef <4 x float> 
@llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 
0x3E74442D00000000), <4 x float> [[FNEG_I28_I_I_I_I]])
+// CHECK-NEXT:    [[SUB5_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], 
[[MUL_I27_I_I_I_I]]
+// CHECK-NEXT:    [[SUB6_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], 
[[SUB5_I_I_I_I]]
+// CHECK-NEXT:    [[SUB7_I_I_I_I:%.*]] = fsub <4 x float> [[SUB6_I_I_I_I]], 
[[MUL_I27_I_I_I_I]]
+// CHECK-NEXT:    [[SUB8_I_I_I_I:%.*]] = fsub <4 x float> [[SUB7_I_I_I_I]], 
[[TMP3]]
+// CHECK-NEXT:    [[ADD9_I_I_I_I:%.*]] = fadd <4 x float> [[SUB5_I_I_I_I]], 
[[SUB8_I_I_I_I]]
+// CHECK-NEXT:    [[MUL_I_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], 
splat (float 0x3CF8469880000000)
+// CHECK-NEXT:    [[FNEG_I_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I_I_I_I_I]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call noundef <4 x float> 
@llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 
0x3CF8469880000000), <4 x float> [[FNEG_I_I_I_I_I]])
+// CHECK-NEXT:    [[SUB10_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], 
[[MUL_I_I_I_I_I]]
+// CHECK-NEXT:    [[SUB11_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], 
[[SUB10_I_I_I_I]]
+// CHECK-NEXT:    [[SUB12_I_I_I_I:%.*]] = fsub <4 x float> [[SUB11_I_I_I_I]], 
[[MUL_I_I_I_I_I]]
+// CHECK-NEXT:    [[ADD13_I_I_I_I:%.*]] = fadd <4 x float> [[SUB10_I_I_I_I]], 
[[SUB12_I_I_I_I]]
+// CHECK-NEXT:    [[FNEG_I_I_I_I:%.*]] = fneg <4 x float> [[TMP4]]
+// CHECK-NEXT:    [[CONV_I_I_I:%.*]] = fptosi <4 x float> [[ELT_TRUNC_I_I]] to 
<4 x i32>
+// CHECK-NEXT:    [[ASTYPE_I_I_I:%.*]] = bitcast <4 x float> [[ELT_ABS_I_I_I]] 
to <4 x i32>
+// CHECK-NEXT:    [[SHR_I_I_I:%.*]] = lshr <4 x i32> [[ASTYPE_I_I_I]], splat 
(i32 23)
+// CHECK-NEXT:    [[AND_I11_I_I:%.*]] = and <4 x i32> [[ASTYPE_I_I_I]], splat 
(i32 8388607)
+// CHECK-NEXT:    [[OR_I_I_I:%.*]] = or disjoint <4 x i32> [[AND_I11_I_I]], 
splat (i32 8388608)
+// CHECK-NEXT:    [[MUL_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 
-28220501)
+// CHECK-NEXT:    [[CONV_I1_I27_I_I:%.*]] = zext nneg <4 x i32> [[OR_I_I_I]] 
to <4 x i64>
+// CHECK-NEXT:    [[MUL_I28_I_I:%.*]] = mul nuw nsw <4 x i64> 
[[CONV_I1_I27_I_I]], splat (i64 4266746795)
+// CHECK-NEXT:    [[SHR_I29_I_I:%.*]] = lshr <4 x i64> [[MUL_I28_I_I]], splat 
(i64 32)
+// CHECK-NEXT:    [[CONV_I2_I30_I_I:%.*]] = trunc nuw nsw <4 x i64> 
[[SHR_I29_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[MUL2_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 
1011060801)
+// CHECK-NEXT:    [[ADD_I_I_I:%.*]] = add <4 x i32> [[MUL2_I_I_I]], 
[[CONV_I2_I30_I_I]]
+// CHECK-NEXT:    [[MUL_I24_I_I:%.*]] = mul nuw nsw <4 x i64> 
[[CONV_I1_I27_I_I]], splat (i64 1011060801)
+// CHECK-NEXT:    [[SHR_I25_I_I:%.*]] = lshr <4 x i64> [[MUL_I24_I_I]], splat 
(i64 32)
+// CHECK-NEXT:    [[CONV_I2_I26_I_I:%.*]] = trunc nuw nsw <4 x i64> 
[[SHR_I25_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD_I_I_I]], 
[[CONV_I2_I30_I_I]]
+// CHECK-NEXT:    [[SEXT_I_I1_I:%.*]] = zext <4 x i1> [[CMP_I_I_I]] to <4 x 
i32>
+// CHECK-NEXT:    [[ADD5_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT_I_I1_I]], 
[[CONV_I2_I26_I_I]]
+// CHECK-NEXT:    [[MUL6_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 
-614296167)
+// CHECK-NEXT:    [[ADD7_I_I_I:%.*]] = add <4 x i32> [[ADD5_I_I_I]], 
[[MUL6_I_I_I]]
+// CHECK-NEXT:    [[MUL_I20_I_I:%.*]] = mul nuw nsw <4 x i64> 
[[CONV_I1_I27_I_I]], splat (i64 3680671129)
+// CHECK-NEXT:    [[SHR_I21_I_I:%.*]] = lshr <4 x i64> [[MUL_I20_I_I]], splat 
(i64 32)
+// CHECK-NEXT:    [[CONV_I2_I22_I_I:%.*]] = trunc nuw nsw <4 x i64> 
[[SHR_I21_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP9_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD7_I_I_I]], 
[[ADD5_I_I_I]]
+// CHECK-NEXT:    [[SEXT10_I_I_I:%.*]] = zext <4 x i1> [[CMP9_I_I_I]] to <4 x 
i32>
+// CHECK-NEXT:    [[ADD13_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[SEXT10_I_I_I]], [[CONV_I2_I22_I_I]]
+// CHECK-NEXT:    [[MUL14_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 
-181084736)
+// CHECK-NEXT:    [[ADD15_I_I_I:%.*]] = add <4 x i32> [[ADD13_I_I_I]], 
[[MUL14_I_I_I]]
+// CHECK-NEXT:    [[MUL_I16_I_I:%.*]] = mul nuw nsw <4 x i64> 
[[CONV_I1_I27_I_I]], splat (i64 4113882560)
+// CHECK-NEXT:    [[SHR_I17_I_I:%.*]] = lshr <4 x i64> [[MUL_I16_I_I]], splat 
(i64 32)
+// CHECK-NEXT:    [[CONV_I2_I18_I_I:%.*]] = trunc nuw nsw <4 x i64> 
[[SHR_I17_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP17_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD15_I_I_I]], 
[[ADD13_I_I_I]]
+// CHECK-NEXT:    [[SEXT18_I_I_I:%.*]] = zext <4 x i1> [[CMP17_I_I_I]] to <4 x 
i32>
+// CHECK-NEXT:    [[ADD21_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[SEXT18_I_I_I]], [[CONV_I2_I18_I_I]]
+// CHECK-NEXT:    [[MUL22_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 
-64530479)
+// CHECK-NEXT:    [[ADD23_I_I_I:%.*]] = add <4 x i32> [[ADD21_I_I_I]], 
[[MUL22_I_I_I]]
+// CHECK-NEXT:    [[MUL_I12_I_I:%.*]] = mul nuw nsw <4 x i64> 
[[CONV_I1_I27_I_I]], splat (i64 4230436817)
+// CHECK-NEXT:    [[SHR_I13_I_I:%.*]] = lshr <4 x i64> [[MUL_I12_I_I]], splat 
(i64 32)
+// CHECK-NEXT:    [[CONV_I2_I14_I_I:%.*]] = trunc nuw nsw <4 x i64> 
[[SHR_I13_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP25_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD23_I_I_I]], 
[[ADD21_I_I_I]]
+// CHECK-NEXT:    [[SEXT26_I_I_I:%.*]] = zext <4 x i1> [[CMP25_I_I_I]] to <4 x 
i32>
+// CHECK-NEXT:    [[ADD29_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[SEXT26_I_I_I]], [[CONV_I2_I14_I_I]]
+// CHECK-NEXT:    [[MUL30_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 
1313084713)
+// CHECK-NEXT:    [[ADD31_I_I_I:%.*]] = add <4 x i32> [[ADD29_I_I_I]], 
[[MUL30_I_I_I]]
+// CHECK-NEXT:    [[MUL_I8_I_I:%.*]] = mul nuw nsw <4 x i64> 
[[CONV_I1_I27_I_I]], splat (i64 1313084713)
+// CHECK-NEXT:    [[SHR_I9_I_I:%.*]] = lshr <4 x i64> [[MUL_I8_I_I]], splat 
(i64 32)
+// CHECK-NEXT:    [[CONV_I2_I10_I_I:%.*]] = trunc nuw nsw <4 x i64> 
[[SHR_I9_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP33_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD31_I_I_I]], 
[[ADD29_I_I_I]]
+// CHECK-NEXT:    [[SEXT34_I_I_I:%.*]] = zext <4 x i1> [[CMP33_I_I_I]] to <4 x 
i32>
+// CHECK-NEXT:    [[ADD37_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[SEXT34_I_I_I]], [[CONV_I2_I10_I_I]]
+// CHECK-NEXT:    [[MUL38_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 
-1560706194)
+// CHECK-NEXT:    [[ADD39_I_I_I:%.*]] = add <4 x i32> [[ADD37_I_I_I]], 
[[MUL38_I_I_I]]
+// CHECK-NEXT:    [[MUL_I5_I_I:%.*]] = mul nuw nsw <4 x i64> 
[[CONV_I1_I27_I_I]], splat (i64 2734261102)
+// CHECK-NEXT:    [[SHR_I6_I_I:%.*]] = lshr <4 x i64> [[MUL_I5_I_I]], splat 
(i64 32)
+// CHECK-NEXT:    [[CONV_I2_I_I_I:%.*]] = trunc nuw nsw <4 x i64> 
[[SHR_I6_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP41_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD39_I_I_I]], 
[[ADD37_I_I_I]]
+// CHECK-NEXT:    [[SEXT42_I_I_I:%.*]] = zext <4 x i1> [[CMP41_I_I_I]] to <4 x 
i32>
+// CHECK-NEXT:    [[ADD45_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[SEXT42_I_I_I]], [[CONV_I2_I_I_I]]
+// CHECK-NEXT:    [[SUB47_I_I_I:%.*]] = add nsw <4 x i32> [[SHR_I_I_I]], splat 
(i32 -120)
+// CHECK-NEXT:    [[CMP48_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB47_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[COND51_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> [[ADD39_I_I_I]], <4 x i32> [[ADD45_I_I_I]]
+// CHECK-NEXT:    [[COND53_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> [[ADD31_I_I_I]], <4 x i32> [[ADD39_I_I_I]]
+// CHECK-NEXT:    [[COND55_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> [[ADD23_I_I_I]], <4 x i32> [[ADD31_I_I_I]]
+// CHECK-NEXT:    [[COND57_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> [[ADD15_I_I_I]], <4 x i32> [[ADD23_I_I_I]]
+// CHECK-NEXT:    [[COND59_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> [[ADD7_I_I_I]], <4 x i32> [[ADD15_I_I_I]]
+// CHECK-NEXT:    [[COND61_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> [[ADD_I_I_I]], <4 x i32> [[ADD7_I_I_I]]
+// CHECK-NEXT:    [[COND63_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> [[MUL_I_I_I]], <4 x i32> [[ADD_I_I_I]]
+// CHECK-NEXT:    [[DOTNEG_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x 
i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB66_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG_I_I_I]], 
[[SUB47_I_I_I]]
+// CHECK-NEXT:    [[CMP67_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB66_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[COND70_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x 
i32> [[COND53_I_I_I]], <4 x i32> [[COND51_I_I_I]]
+// CHECK-NEXT:    [[COND72_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x 
i32> [[COND55_I_I_I]], <4 x i32> [[COND53_I_I_I]]
+// CHECK-NEXT:    [[COND74_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x 
i32> [[COND57_I_I_I]], <4 x i32> [[COND55_I_I_I]]
+// CHECK-NEXT:    [[COND76_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x 
i32> [[COND59_I_I_I]], <4 x i32> [[COND57_I_I_I]]
+// CHECK-NEXT:    [[COND78_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x 
i32> [[COND61_I_I_I]], <4 x i32> [[COND59_I_I_I]]
+// CHECK-NEXT:    [[COND80_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x 
i32> [[COND63_I_I_I]], <4 x i32> [[COND61_I_I_I]]
+// CHECK-NEXT:    [[DOTNEG379_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], 
<4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB83_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG379_I_I_I]], 
[[SUB66_I_I_I]]
+// CHECK-NEXT:    [[CMP84_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB83_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[COND87_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x 
i32> [[COND72_I_I_I]], <4 x i32> [[COND70_I_I_I]]
+// CHECK-NEXT:    [[COND89_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x 
i32> [[COND74_I_I_I]], <4 x i32> [[COND72_I_I_I]]
+// CHECK-NEXT:    [[COND91_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x 
i32> [[COND76_I_I_I]], <4 x i32> [[COND74_I_I_I]]
+// CHECK-NEXT:    [[COND93_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x 
i32> [[COND78_I_I_I]], <4 x i32> [[COND76_I_I_I]]
+// CHECK-NEXT:    [[COND95_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x 
i32> [[COND80_I_I_I]], <4 x i32> [[COND78_I_I_I]]
+// CHECK-NEXT:    [[DOTNEG380_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], 
<4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB98_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG380_I_I_I]], 
[[SUB83_I_I_I]]
+// CHECK-NEXT:    [[CMP99_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB98_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[COND102_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 
x i32> [[COND89_I_I_I]], <4 x i32> [[COND87_I_I_I]]
+// CHECK-NEXT:    [[COND104_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 
x i32> [[COND91_I_I_I]], <4 x i32> [[COND89_I_I_I]]
+// CHECK-NEXT:    [[COND106_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 
x i32> [[COND93_I_I_I]], <4 x i32> [[COND91_I_I_I]]
+// CHECK-NEXT:    [[COND108_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 
x i32> [[COND95_I_I_I]], <4 x i32> [[COND93_I_I_I]]
+// CHECK-NEXT:    [[DOTNEG381_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], 
<4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB111_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, 
[[SUB98_I_I_I]]
+// CHECK-NEXT:    [[CMP112_NOT_I_I_I:%.*]] = icmp eq <4 x i32> 
[[DOTNEG381_I_I_I]], [[SUB111_I_I_I]]
+// CHECK-NEXT:    [[SUB114_I_I_I:%.*]] = sub nsw <4 x i32> splat (i32 24), 
[[SHR_I_I_I]]
+// CHECK-NEXT:    [[SHL_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB47_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[SHL_I_I_I:%.*]] = shl <4 x i32> [[COND102_I_I_I]], 
[[SHL_MASK_I_I_I]]
+// CHECK-NEXT:    [[SHR_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB114_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[SHR116_I_I_I:%.*]] = lshr <4 x i32> [[COND104_I_I_I]], 
[[SHR_MASK_I_I_I]]
+// CHECK-NEXT:    [[OR117_I_I_I:%.*]] = or <4 x i32> [[SHL_I_I_I]], 
[[SHR116_I_I_I]]
+// CHECK-NEXT:    [[SHL120_I_I_I:%.*]] = shl <4 x i32> [[COND104_I_I_I]], 
[[SHL_MASK_I_I_I]]
+// CHECK-NEXT:    [[SHR122_I_I_I:%.*]] = lshr <4 x i32> [[COND106_I_I_I]], 
[[SHR_MASK_I_I_I]]
+// CHECK-NEXT:    [[OR123_I_I_I:%.*]] = or <4 x i32> [[SHL120_I_I_I]], 
[[SHR122_I_I_I]]
+// CHECK-NEXT:    [[SHL126_I_I_I:%.*]] = shl <4 x i32> [[COND106_I_I_I]], 
[[SHL_MASK_I_I_I]]
+// CHECK-NEXT:    [[SHR128_I_I_I:%.*]] = lshr <4 x i32> [[COND108_I_I_I]], 
[[SHR_MASK_I_I_I]]
+// CHECK-NEXT:    [[OR129_I_I_I:%.*]] = or <4 x i32> [[SHL126_I_I_I]], 
[[SHR128_I_I_I]]
+// CHECK-NEXT:    [[COND131_I_I_I:%.*]] = select <4 x i1> 
[[CMP112_NOT_I_I_I]], <4 x i32> [[COND102_I_I_I]], <4 x i32> [[OR117_I_I_I]]
+// CHECK-NEXT:    [[COND133_I_I_I:%.*]] = select <4 x i1> 
[[CMP112_NOT_I_I_I]], <4 x i32> [[COND104_I_I_I]], <4 x i32> [[OR123_I_I_I]]
+// CHECK-NEXT:    [[COND135_I_I_I:%.*]] = select <4 x i1> 
[[CMP112_NOT_I_I_I]], <4 x i32> [[COND106_I_I_I]], <4 x i32> [[OR129_I_I_I]]
+// CHECK-NEXT:    [[SHR136_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], 
splat (i32 29)
+// CHECK-NEXT:    [[OR139_I_I_I:%.*]] = tail call <4 x i32> 
@llvm.fshl.v4i32(<4 x i32> [[COND131_I_I_I]], <4 x i32> [[COND133_I_I_I]], <4 x 
i32> splat (i32 2))
+// CHECK-NEXT:    [[OR142_I_I_I:%.*]] = tail call <4 x i32> 
@llvm.fshl.v4i32(<4 x i32> [[COND133_I_I_I]], <4 x i32> [[COND135_I_I_I]], <4 x 
i32> splat (i32 2))
+// CHECK-NEXT:    [[OR145_I_I_I:%.*]] = tail call <4 x i32> 
@llvm.fshl.v4i32(<4 x i32> [[COND135_I_I_I]], <4 x i32> [[COND108_I_I_I]], <4 x 
i32> splat (i32 2))
+// CHECK-NEXT:    [[AND146_I_I_I:%.*]] = and <4 x i32> [[SHR136_I_I_I]], splat 
(i32 1)
+// CHECK-NEXT:    [[SEXT148_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, 
[[AND146_I_I_I]]
+// CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i32> [[SEXT148_I_I_I]], splat (i32 
-2147483648)
+// CHECK-NEXT:    [[XOR_I_I_I:%.*]] = xor <4 x i32> [[OR139_I_I_I]], 
[[SEXT148_I_I_I]]
+// CHECK-NEXT:    [[XOR156_I_I_I:%.*]] = xor <4 x i32> [[OR142_I_I_I]], 
[[SEXT148_I_I_I]]
+// CHECK-NEXT:    [[XOR157_I_I_I:%.*]] = xor <4 x i32> [[OR145_I_I_I]], 
[[SEXT148_I_I_I]]
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 0
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP6]], i1 false)
+// CHECK-NEXT:    [[VECINIT_I1_I_I:%.*]] = insertelement <4 x i32> poison, i32 
[[TMP7]], i64 0
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 1
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP8]], i1 false)
+// CHECK-NEXT:    [[VECINIT2_I2_I_I:%.*]] = insertelement <4 x i32> 
[[VECINIT_I1_I_I]], i32 [[TMP9]], i64 1
+// CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 2
+// CHECK-NEXT:    [[TMP11:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP10]], i1 false)
+// CHECK-NEXT:    [[VECINIT4_I3_I_I:%.*]] = insertelement <4 x i32> 
[[VECINIT2_I2_I_I]], i32 [[TMP11]], i64 2
+// CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 3
+// CHECK-NEXT:    [[TMP13:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP12]], i1 false)
+// CHECK-NEXT:    [[VECINIT6_I4_I_I:%.*]] = insertelement <4 x i32> 
[[VECINIT4_I3_I_I]], i32 [[TMP13]], i64 3
+// CHECK-NEXT:    [[ADD159_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[VECINIT6_I4_I_I]], splat (i32 1)
+// CHECK-NEXT:    [[SHL_MASK162_I_I_I:%.*]] = and <4 x i32> [[ADD159_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[SHL163_I_I_I:%.*]] = shl <4 x i32> [[XOR_I_I_I]], 
[[SHL_MASK162_I_I_I]]
+// CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i32> [[VECINIT6_I4_I_I]], splat 
(i32 31)
+// CHECK-NEXT:    [[SHR_MASK164_I_I_I:%.*]] = xor <4 x i32> [[TMP14]], splat 
(i32 31)
+// CHECK-NEXT:    [[SHR165_I_I_I:%.*]] = lshr <4 x i32> [[XOR156_I_I_I]], 
[[SHR_MASK164_I_I_I]]
+// CHECK-NEXT:    [[OR166_I_I_I:%.*]] = or <4 x i32> [[SHL163_I_I_I]], 
[[SHR165_I_I_I]]
+// CHECK-NEXT:    [[SHL169_I_I_I:%.*]] = shl <4 x i32> [[XOR156_I_I_I]], 
[[SHL_MASK162_I_I_I]]
+// CHECK-NEXT:    [[SHR171_I_I_I:%.*]] = lshr <4 x i32> [[XOR157_I_I_I]], 
[[SHR_MASK164_I_I_I]]
+// CHECK-NEXT:    [[OR172_I_I_I:%.*]] = or <4 x i32> [[SHL169_I_I_I]], 
[[SHR171_I_I_I]]
+// CHECK-NEXT:    [[SHR176_I_I_I:%.*]] = lshr <4 x i32> [[OR166_I_I_I]], splat 
(i32 9)
+// CHECK-NEXT:    [[TMP15:%.*]] = shl nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], 
splat (i32 23)
+// CHECK-NEXT:    [[REASS_SUB:%.*]] = sub nsw <4 x i32> [[SHR176_I_I_I]], 
[[TMP15]]
+// CHECK-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[REASS_SUB]], splat (i32 
1056964608)
+// CHECK-NEXT:    [[OR177_I_I_I:%.*]] = or <4 x i32> [[TMP16]], [[TMP5]]
+// CHECK-NEXT:    [[ASTYPE178_I_I_I:%.*]] = bitcast <4 x i32> [[OR177_I_I_I]] 
to <4 x float>
+// CHECK-NEXT:    [[OR181_I_I_I:%.*]] = tail call <4 x i32> 
@llvm.fshl.v4i32(<4 x i32> [[OR166_I_I_I]], <4 x i32> [[OR172_I_I_I]], <4 x 
i32> splat (i32 23))
+// CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], 
i64 0
+// CHECK-NEXT:    [[TMP18:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP17]], i1 false)
+// CHECK-NEXT:    [[VECINIT_I_I_I:%.*]] = insertelement <4 x i32> poison, i32 
[[TMP18]], i64 0
+// CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], 
i64 1
+// CHECK-NEXT:    [[TMP20:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP19]], i1 false)
+// CHECK-NEXT:    [[VECINIT2_I_I_I:%.*]] = insertelement <4 x i32> 
[[VECINIT_I_I_I]], i32 [[TMP20]], i64 1
+// CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], 
i64 2
+// CHECK-NEXT:    [[TMP22:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP21]], i1 false)
+// CHECK-NEXT:    [[VECINIT4_I_I_I:%.*]] = insertelement <4 x i32> 
[[VECINIT2_I_I_I]], i32 [[TMP22]], i64 2
+// CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], 
i64 3
+// CHECK-NEXT:    [[TMP24:%.*]] = tail call range(i32 0, 33) i32 
@llvm.ctlz.i32(i32 [[TMP23]], i1 false)
+// CHECK-NEXT:    [[VECINIT6_I_I_I:%.*]] = insertelement <4 x i32> 
[[VECINIT4_I_I_I]], i32 [[TMP24]], i64 3
+// CHECK-NEXT:    [[ADD183_I_I_NEG_I:%.*]] = xor <4 x i32> [[VECINIT6_I_I_I]], 
splat (i32 -1)
+// CHECK-NEXT:    [[ADD183_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[VECINIT6_I_I_I]], splat (i32 1)
+// CHECK-NEXT:    [[SHL_MASK186_I_I_I:%.*]] = and <4 x i32> [[ADD183_I_I_I]], 
splat (i32 31)
+// CHECK-NEXT:    [[SHL187_I_I_I:%.*]] = shl <4 x i32> [[OR181_I_I_I]], 
[[SHL_MASK186_I_I_I]]
+// CHECK-NEXT:    [[TMP25:%.*]] = and <4 x i32> [[VECINIT6_I_I_I]], splat (i32 
31)
+// CHECK-NEXT:    [[SHR_MASK189_I_I_I:%.*]] = xor <4 x i32> [[TMP25]], splat 
(i32 31)
+// CHECK-NEXT:    [[SHR190_I_I_I:%.*]] = lshr <4 x i32> [[OR172_I_I_I]], 
[[SHR_MASK189_I_I_I]]
+// CHECK-NEXT:    [[OR191_I_I_I:%.*]] = or <4 x i32> [[SHL187_I_I_I]], 
[[SHR190_I_I_I]]
+// CHECK-NEXT:    [[REASS_SUB10_I:%.*]] = sub nuw nsw <4 x i32> 
[[ADD183_I_I_NEG_I]], [[VECINIT6_I4_I_I]]
+// CHECK-NEXT:    [[ADD193_I_I_NEG_I:%.*]] = shl <4 x i32> [[REASS_SUB10_I]], 
splat (i32 23)
+// CHECK-NEXT:    [[SHR197_I_I_I:%.*]] = lshr <4 x i32> [[OR191_I_I_I]], splat 
(i32 9)
+// CHECK-NEXT:    [[REASS_SUB16_I_I:%.*]] = add <4 x i32> 
[[ADD193_I_I_NEG_I]], splat (i32 864026624)
+// CHECK-NEXT:    [[TMP26:%.*]] = or disjoint <4 x i32> [[SHR197_I_I_I]], 
[[REASS_SUB16_I_I]]
+// CHECK-NEXT:    [[OR198_I_I_I:%.*]] = or <4 x i32> [[TMP26]], [[TMP5]]
+// CHECK-NEXT:    [[ASTYPE199_I_I_I:%.*]] = bitcast <4 x i32> [[OR198_I_I_I]] 
to <4 x float>
+// CHECK-NEXT:    [[MUL200_I_I_I:%.*]] = fmul <4 x float> [[ASTYPE178_I_I_I]], 
splat (float 0x3FF921FB40000000)
+// CHECK-NEXT:    [[FNEG_I_I_I:%.*]] = fneg <4 x float> [[MUL200_I_I_I]]
+// CHECK-NEXT:    [[TMP27:%.*]] = tail call noundef <4 x float> 
@llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 
0x3FF921FB40000000), <4 x float> [[FNEG_I_I_I]])
+// CHECK-NEXT:    [[TMP28:%.*]] = tail call noundef <4 x float> 
@llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 
0x3E74442D00000000), <4 x float> [[TMP27]])
+// CHECK-NEXT:    [[TMP29:%.*]] = tail call noundef <4 x float> 
@llvm.fma.v4f32(<4 x float> [[ASTYPE199_I_I_I]], <4 x float> splat (float 
0x3FF921FB40000000), <4 x float> [[TMP28]])
+// CHECK-NEXT:    [[ADD204_I_I_I:%.*]] = fadd <4 x float> [[MUL200_I_I_I]], 
[[TMP29]]
+// CHECK-NEXT:    [[SUB205_I_I_I:%.*]] = fsub <4 x float> [[ADD204_I_I_I]], 
[[MUL200_I_I_I]]
+// CHECK-NEXT:    [[SUB206_I_I_I:%.*]] = fsub <4 x float> [[TMP29]], 
[[SUB205_I_I_I]]
+// CHECK-NEXT:    [[SHR207_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], 
splat (i32 30)
+// CHECK-NEXT:    [[ADD209_I_I_I:%.*]] = add nuw nsw <4 x i32> 
[[AND146_I_I_I]], [[SHR207_I_I_I]]
+// CHECK-NEXT:    [[COND_V_I2_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x 
float> [[ADD13_I_I_I_I]], <4 x float> [[ADD204_I_I_I]]
+// CHECK-NEXT:    [[COND4_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x 
float> [[FNEG_I_I_I_I]], <4 x float> [[SUB206_I_I_I]]
+// CHECK-NEXT:    [[COND6_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> 
[[CONV_I_I_I]], <4 x i32> [[ADD209_I_I_I]]
+// CHECK-NEXT:    [[COND6_I_I:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat 
(i32 2)
+// CHECK-NEXT:    [[MUL_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], 
[[COND_V_I2_I]]
+// CHECK-NEXT:    [[MUL1_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], 
[[MUL_I_I]]
+// CHECK-NEXT:    [[TMP30:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 
0x3DE5D93A60000000), <4 x float> splat (float 0xBE5AE5E680000000))
+// CHECK-NEXT:    [[TMP31:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP30]], <4 x float> 
splat (float 0x3EC6DBE4A0000000))
+// CHECK-NEXT:    [[TMP32:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP31]], <4 x float> 
splat (float 0xBF2A013A80000000))
+// CHECK-NEXT:    [[TMP33:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP32]], <4 x float> 
splat (float 0x3F811110E0000000))
+// CHECK-NEXT:    [[FNEG_I3_I:%.*]] = fneg <4 x float> [[MUL1_I_I]]
+// CHECK-NEXT:    [[MUL5_I_I:%.*]] = fmul <4 x float> [[TMP33]], [[FNEG_I3_I]]
+// CHECK-NEXT:    [[TMP34:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[COND4_V_I_I]], <4 x float> splat (float 
5.000000e-01), <4 x float> [[MUL5_I_I]])
+// CHECK-NEXT:    [[FNEG7_I_I:%.*]] = fneg <4 x float> [[COND4_V_I_I]]
+// CHECK-NEXT:    [[TMP35:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP34]], <4 x float> 
[[FNEG7_I_I]])
+// CHECK-NEXT:    [[TMP36:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL1_I_I]], <4 x float> splat (float 
0x3FC5555560000000), <4 x float> [[TMP35]])
+// CHECK-NEXT:    [[SUB_I_I:%.*]] = fsub <4 x float> [[COND_V_I2_I]], [[TMP36]]
+// CHECK-NEXT:    [[FNEG_I_I:%.*]] = fneg <4 x float> [[SUB_I_I]]
+// CHECK-NEXT:    [[TMP37:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 
0xBDA8FAE9C0000000), <4 x float> splat (float 0x3E21EE9EC0000000))
+// CHECK-NEXT:    [[TMP38:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP37]], <4 x float> 
splat (float 0xBE92524740000000))
+// CHECK-NEXT:    [[TMP39:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP38]], <4 x float> 
splat (float 0x3EFA015C40000000))
+// CHECK-NEXT:    [[TMP40:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP39]], <4 x float> 
splat (float 0xBF56C16C00000000))
+// CHECK-NEXT:    [[TMP41:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP40]], <4 x float> 
splat (float 0x3FA5555560000000))
+// CHECK-NEXT:    [[MUL5_I5_I:%.*]] = fmul <4 x float> [[MUL_I_I]], [[TMP41]]
+// CHECK-NEXT:    [[TMP42:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x 
float> [[COND_V_I2_I]])
+// CHECK-NEXT:    [[AND_I_I:%.*]] = bitcast <4 x float> [[TMP42]] to <4 x i32>
+// CHECK-NEXT:    [[SUB_I6_I:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 
-16777216)
+// CHECK-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 
-1050253722)
+// CHECK-NEXT:    [[AND938_I_I:%.*]] = icmp ult <4 x i32> [[TMP43]], splat 
(i32 11429479)
+// CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[AND938_I_I]], <4 x i32> 
[[SUB_I6_I]], <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[CMP11_I_I:%.*]] = icmp samesign ugt <4 x i32> [[AND_I_I]], 
splat (i32 1061683200)
+// CHECK-NEXT:    [[COND14_I_I:%.*]] = select <4 x i1> [[CMP11_I_I]], <4 x 
i32> splat (i32 1049624576), <4 x i32> [[TMP44]]
+// CHECK-NEXT:    [[TMP45:%.*]] = bitcast <4 x i32> [[COND14_I_I]] to <4 x 
float>
+// CHECK-NEXT:    [[FNEG_I7_I:%.*]] = fneg <4 x float> [[TMP45]]
+// CHECK-NEXT:    [[TMP46:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 
5.000000e-01), <4 x float> [[FNEG_I7_I]])
+// CHECK-NEXT:    [[SUB16_I_I:%.*]] = fsub <4 x float> splat (float 
1.000000e+00), [[TMP45]]
+// CHECK-NEXT:    [[FNEG17_I_I:%.*]] = fneg <4 x float> [[COND_V_I2_I]]
+// CHECK-NEXT:    [[MUL18_I_I:%.*]] = fmul <4 x float> [[COND4_V_I_I]], 
[[FNEG17_I_I]]
+// CHECK-NEXT:    [[TMP47:%.*]] = tail call noundef <4 x float> 
@llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[MUL5_I5_I]], <4 x 
float> [[MUL18_I_I]])
+// CHECK-NEXT:    [[TMP48:%.*]] = fsub <4 x float> [[TMP47]], [[TMP46]]
+// CHECK-NEXT:    [[SUB21_I_I:%.*]] = fadd <4 x float> [[SUB16_I_I]], [[TMP48]]
+// CHECK-NEXT:    [[TMP49:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 1)
+// CHECK-NEXT:    [[TMP50:%.*]] = icmp eq <4 x i32> [[TMP49]], zeroinitializer
+// CHECK-NEXT:    [[COND_V_I_I:%.*]] = select <4 x i1> [[TMP50]], <4 x float> 
[[SUB21_I_I]], <4 x float> [[FNEG_I_I]]
+// CHECK-NEXT:    [[COND_I_I:%.*]] = bitcast <4 x float> [[COND_V_I_I]] to <4 
x i32>
+// CHECK-NEXT:    [[CMP5_I_I:%.*]] = icmp ne <4 x i32> [[COND6_I_I]], 
zeroinitializer
+// CHECK-NEXT:    [[SEXT6_I_I:%.*]] = sext <4 x i1> [[CMP5_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[SHL_I_I:%.*]] = shl nsw <4 x i32> [[SEXT6_I_I]], splat 
(i32 31)
+// CHECK-NEXT:    [[XOR_I_I:%.*]] = xor <4 x i32> [[SHL_I_I]], [[COND_I_I]]
+// CHECK-NEXT:    [[ASTYPE7_I_I:%.*]] = bitcast <4 x i32> [[XOR_I_I]] to <4 x 
float>
+// CHECK-NEXT:    [[TMP51:%.*]] = fcmp ueq <4 x float> [[ELT_ABS_I_I_I]], 
splat (float 0x7FF0000000000000)
+// CHECK-NEXT:    [[COND_V_I_I_I:%.*]] = select <4 x i1> [[TMP51]], <4 x 
float> splat (float 0x7FF8000000000000), <4 x float> [[ASTYPE7_I_I]]
+// CHECK-NEXT:    store <4 x float> [[COND_V_I_I_I]], ptr addrspace(1) [[F]], 
align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global float4 *f) {
+  *f = cos(*f);
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"float4*"}
+// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/math/fabs.cl b/libclc/test/math/fabs.cl
new file mode 100644
index 0000000000000..5935fc9f62627
--- /dev/null
+++ b/libclc/test/math/fabs.cl
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 4 captures(none) [[F:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] 
!kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] 
!kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(1) [[F]], align 4, 
!tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT:    [[ELT_ABS_I_I:%.*]] = tail call noundef float 
@llvm.fabs.f32(float [[TMP0]])
+// CHECK-NEXT:    store float [[ELT_ABS_I_I]], ptr addrspace(1) [[F]], align 
4, !tbaa [[TBAA10]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global float *f) {
+  *f = fabs(*f);
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"float*"}
+// CHECK: [[META9]] = !{!""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"float", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/math/rsqrt.cl b/libclc/test/math/rsqrt.cl
new file mode 100644
index 0000000000000..fa5e13f2705f2
--- /dev/null
+++ b/libclc/test/math/rsqrt.cl
@@ -0,0 +1,48 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#if defined(cl_khr_fp64)
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) 
initializes((16, 32)) [[X:%.*]], ptr addrspace(1) noundef align 32 
captures(none) initializes((32, 64)) [[Y:%.*]]) local_unnamed_addr 
#[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] 
!kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] 
!kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual 
[[META10:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[X]], 
align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call contract <4 x float> 
@llvm.sqrt.v4f32(<4 x float> [[TMP0]]), !fpmath [[META14:![0-9]+]]
+// CHECK-NEXT:    [[DIV_I_I:%.*]] = fdiv contract <4 x float> splat (float 
1.000000e+00), [[TMP1]], !fpmath [[META15:![0-9]+]]
+// CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr 
addrspace(1) [[X]], i64 16
+// CHECK-NEXT:    store <4 x float> [[DIV_I_I]], ptr addrspace(1) 
[[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr addrspace(1) [[Y]], 
align 32, !tbaa [[TBAA11]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call contract <4 x double> 
@llvm.sqrt.v4f64(<4 x double> [[TMP2]])
+// CHECK-NEXT:    [[DIV_I_I1:%.*]] = fdiv contract <4 x double> splat (double 
1.000000e+00), [[TMP3]]
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr 
addrspace(1) [[Y]], i64 32
+// CHECK-NEXT:    store <4 x double> [[DIV_I_I1]], ptr addrspace(1) 
[[ARRAYIDX4_I]], align 32, !tbaa [[TBAA11]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global float4 *x, __global double4 *y) {
+  x[1] = rsqrt(x[0]);
+  y[1] = rsqrt(y[0]);
+}
+
+#endif
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none"}
+// CHECK: [[META8]] = !{!"float4*", !"double4*"}
+// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*", 
!"double __attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!"", !""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META14]] = !{float 3.000000e+00}
+// CHECK: [[META15]] = !{float 2.500000e+00}
+//.
diff --git a/libclc/test/misc/as_type.cl b/libclc/test/misc/as_type.cl
new file mode 100644
index 0000000000000..a475956e62d0a
--- /dev/null
+++ b/libclc/test/misc/as_type.cl
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) 
initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 
captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] 
!kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual 
[[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type 
[[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[Y]], align 
16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[X]], align 16, 
!tbaa [[TBAA11]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global int4 *x, __global float4 *y) {
+  *x = as_int4(*y);
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none"}
+// CHECK: [[META8]] = !{!"int4*", !"float4*"}
+// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float 
__attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!"", !""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/misc/convert.cl b/libclc/test/misc/convert.cl
new file mode 100644
index 0000000000000..cd8c41465ed38
--- /dev/null
+++ b/libclc/test/misc/convert.cl
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) 
initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 
captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] 
!kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual 
[[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type 
[[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[Y]], 
align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT:    [[CONV_I_I:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    store <4 x i32> [[CONV_I_I]], ptr addrspace(1) [[X]], align 
16, !tbaa [[TBAA11]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global int4 *x, __global float4 *y) {
+  *x = convert_int4(*y);
+}
+//.
+// CHECK: [[META6]] = !{i32 1, i32 1}
+// CHECK: [[META7]] = !{!"none", !"none"}
+// CHECK: [[META8]] = !{!"int4*", !"float4*"}
+// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float 
__attribute__((ext_vector_type(4)))*"}
+// CHECK: [[META10]] = !{!"", !""}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/libclc/test/rsqrt.cl b/libclc/test/rsqrt.cl
deleted file mode 100644
index 4eebfe8ecf7f9..0000000000000
--- a/libclc/test/rsqrt.cl
+++ /dev/null
@@ -1,14 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-__kernel void foo(float4 *x, double4 *y) {
-  x[1] = rsqrt(x[0]);
-  y[1] = rsqrt(y[0]);
-}
diff --git a/libclc/test/subsat.cl b/libclc/test/subsat.cl
deleted file mode 100644
index 5e6fbdcfbef9e..0000000000000
--- a/libclc/test/subsat.cl
+++ /dev/null
@@ -1,27 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-__kernel void test_subsat_char(char *a, char x, char y) {
-  *a = sub_sat(x, y);
-  return;
-}
-
-__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) {
-  *a = sub_sat(x, y);
-  return;
-}
-
-__kernel void test_subsat_long(long *a, long x, long y) {
-  *a = sub_sat(x, y);
-  return;
-}
-
-__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) {
-  *a = sub_sat(x, y);
-  return;
-}
\ No newline at end of file
diff --git a/libclc/test/work-item/get_group_id.cl 
b/libclc/test/work-item/get_group_id.cl
new file mode 100644
index 0000000000000..f73f8f76cd519
--- /dev/null
+++ b/libclc/test/work-item/get_group_id.cl
@@ -0,0 +1,33 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - 
--libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | 
FileCheck %s
+
+// CHECK-LABEL: define protected amdgpu_kernel void @foo(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 4 captures(none) 
[[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space 
[[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type 
[[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual 
[[META9:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.x()
+// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw i32, ptr 
addrspace(1) [[I]], i64 [[RETVAL_0_I]]
+// CHECK-NEXT:    store i32 1, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa 
[[TBAA10:![0-9]+]]
+// CHECK-NEXT:    ret void
+//
+__kernel void foo(__global int *i) {
+  i[get_group_id(0)] = 1;
+}
+//.
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"int*"}
+// CHECK: [[META9]] = !{!""}
+// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"int", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
+// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+//.

>From e79ba7281bd21e8a145d325a706db8e56d7468dc Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 7 Jul 2025 13:01:49 +0100
Subject: [PATCH 9/9] fix CI?

---
 .ci/compute_projects.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.ci/compute_projects.py b/.ci/compute_projects.py
index c3cf714ce6c10..4c268b7221663 100644
--- a/.ci/compute_projects.py
+++ b/.ci/compute_projects.py
@@ -46,6 +46,7 @@
         "mlir",
         "polly",
         "flang",
+        "libclc",
     },
     "lld": {"bolt", "cross-project-tests"},
     # TODO(issues/132795): LLDB should be enabled on clang changes.
@@ -75,7 +76,7 @@
 # This mapping describes runtimes that should be tested when the key project is
 # touched.
 DEPENDENT_RUNTIMES_TO_TEST = {
-    "clang": {"compiler-rt"},
+    "clang": {"compiler-rt", "libclc"},
     "clang-tools-extra": {"libc"},
     "libc": {"libc"},
     ".ci": {"compiler-rt", "libc"},
@@ -132,6 +133,7 @@
     "lld": "check-lld",
     "flang": "check-flang",
     "libc": "check-libc",
+    "libclc": "check-libclc",
     "lld": "check-lld",
     "lldb": "check-lldb",
     "mlir": "check-mlir",
@@ -139,7 +141,7 @@
     "polly": "check-polly",
 }
 
-RUNTIMES = {"libcxx", "libcxxabi", "libunwind", "compiler-rt", "libc"}
+RUNTIMES = {"libcxx", "libcxxabi", "libunwind", "compiler-rt", "libc", 
"libclc"}
 
 
 def _add_dependencies(projects: Set[str], runtimes: Set[str]) -> Set[str]:

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to