https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/87989
>From b41b2032fdb01bd91d32255bf22a94315b58a017 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 30 Jun 2025 10:59:02 +0100 Subject: [PATCH 1/9] [libclc] Place libclc files in clang's resource dir --- libclc/CMakeLists.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index e2871d1b01a16..8bc3a75739fcd 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -63,6 +63,9 @@ if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DI set( ${tool}_target ) endforeach() endif() + + # Setup the paths where libclc runtimes should be stored. + set( LIBCLC_OUTPUT_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) else() # In-tree configuration set( LIBCLC_STANDALONE_BUILD FALSE ) @@ -82,10 +85,14 @@ else() get_host_tool_path( llvm-link LLVM_LINK llvm-link_exe llvm-link_target ) get_host_tool_path( opt OPT opt_exe opt_target ) endif() -endif() -# Setup the paths where libclc runtimes should be stored. -set( LIBCLC_OUTPUT_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) + # Setup the paths where libclc runtimes should be stored. By default, in an + # in-tree build we place the libraries in clang's resource driectory. + get_clang_resource_dir( LIBCLC_OUTPUT_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. ) + + # Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR. + set( LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_OUTPUT_DIR}/lib/libclc ) +endif() if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} ) message( WARNING "Using custom LLVM tools to build libclc: " >From 4057b6af7db11b59878739bff4d826a9fc616bbf Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Tue, 1 Jul 2025 10:56:54 +0100 Subject: [PATCH 2/9] [clang] Add the ability to link libclc OpenCL libraries This commit adds driver support for linking libclc OpenCL libraries. It takes the form of a new optional flag: --libclc-lib=namespec. Nothing is linked unless this flag is specified. Not all libclc targets have corresponding clang targets. For this reason it is desirable for users to be able to specify a libclc library name. We support this by taking both a library name (without the .bc suffix) or a filename. Both of these are searched for in the clang resource directory or in the LIBRARY_PATH environment variable. Filenames are also checked themselves so that absolute paths can be provided. The syntax for specifying filenames (as opposed to library names) uses a leading colon (:), inspired by the -l option. To accommodate this option, libclc libraries are now placed into clang's resource directory in an in-tree configuration. The aliases are not currently placed there to avoid polluting the directory, but that can be changed. The libraries are all placed in <resource-dir>/lib/libclc and are not grouped under host-specific directories as some other runtime libraries are; it is not expected that OpenCL libraries will differ depending on the host toolchain. Currently only the AMDGPU toolchain supports this option as a proof of concept. Other targets such as NVPTX or SPIR/SPIR-V could support it too. We could optionally let target toolchains search for libclc libraries themselves, possibly when passed an empty --libclc-lib. --- .../clang/Basic/DiagnosticDriverKinds.td | 3 + clang/include/clang/Driver/CommonArgs.h | 3 + clang/include/clang/Driver/Options.td | 2 + clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 + clang/lib/Driver/ToolChains/CommonArgs.cpp | 59 +++++++++++++++++++ clang/test/Driver/Inputs/libclc/libclc.bc | 0 .../Driver/Inputs/libclc/subdir/libclc.bc | 0 clang/test/Driver/opencl-libclc.cl | 10 ++++ 8 files changed, 79 insertions(+) create mode 100644 clang/test/Driver/Inputs/libclc/libclc.bc create mode 100644 clang/test/Driver/Inputs/libclc/subdir/libclc.bc create mode 100644 clang/test/Driver/opencl-libclc.cl diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 34b6c0d7a8acd..019161c22a24f 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -392,6 +392,9 @@ def warn_drv_fraw_string_literals_in_cxx11 : Warning< "ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">, InGroup<UnusedCommandLineArgument>; +def err_drv_libclc_not_found : Error< + "no libclc library '%0' found in the clang resource directory or in LIBRARY_PATH">; + def err_drv_invalid_malign_branch_EQ : Error< "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; diff --git a/clang/include/clang/Driver/CommonArgs.h b/clang/include/clang/Driver/CommonArgs.h index 26aa3ccf84786..7e8ab82eb7863 100644 --- a/clang/include/clang/Driver/CommonArgs.h +++ b/clang/include/clang/Driver/CommonArgs.h @@ -215,6 +215,9 @@ void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs, StringRef BitcodeSuffix, const llvm::Triple &Triple, const ToolChain &HostTC); +void addOpenCLBuiltinsLib(const Driver &D, const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args); + void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 54c71b066f9d4..cf24bcac9c07c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1425,6 +1425,8 @@ def openacc_macro_override_EQ // End Clang specific/exclusive options for OpenACC. +def libclc_lib_EQ : Joined<["--"], "libclc-lib=">, Group<opencl_Group>, + HelpText<"Namespec of libclc OpenCL bitcode library to link">; def libomptarget_amdgpu_bc_path_EQ : Joined<["--"], "libomptarget-amdgpu-bc-path=">, Group<i_Group>, HelpText<"Path to libomptarget-amdgcn bitcode library">; def libomptarget_amdgcn_bc_path_EQ : Joined<["--"], "libomptarget-amdgcn-bc-path=">, Group<i_Group>, diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index b7564a0495da8..e6d1baa2a1caa 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -795,6 +795,8 @@ void AMDGPUToolChain::addClangTargetOptions( CC1Args.push_back("-fvisibility=hidden"); CC1Args.push_back("-fapply-global-visibility-to-externs"); } + + addOpenCLBuiltinsLib(getDriver(), DriverArgs, CC1Args); } void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index bdd77ac84913c..40aaa7ca324be 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2942,6 +2942,65 @@ void tools::addHIPRuntimeLibArgs(const ToolChain &TC, Compilation &C, } } +void tools::addOpenCLBuiltinsLib(const Driver &D, + const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) { + // Check whether user specifies a libclc bytecode library + if (const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ)) { + SmallVector<StringRef, 8> LibraryPaths; + + // Add user defined library paths from LIBRARY_PATH. + std::optional<std::string> LibPath = + llvm::sys::Process::GetEnv("LIBRARY_PATH"); + if (LibPath) { + SmallVector<StringRef, 8> Frags; + const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'}; + llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr); + for (StringRef Path : Frags) + LibraryPaths.emplace_back(Path.trim()); + } + + // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/ + SmallString<128> LibclcPath(D.ResourceDir); + llvm::sys::path::append(LibclcPath, "lib", "libclc"); + LibraryPaths.emplace_back(LibclcPath); + + bool FoundBCLibrary = false; + StringRef LibclcNamespec(A->getValue()); + + // If the namespec is of the form :filename, search for that file. + bool FilenameSearch = LibclcNamespec.starts_with(":"); + SmallString<128> LibclcTargetFile( + LibclcNamespec.drop_front(FilenameSearch ? 1 : 0)); + + if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) { + FoundBCLibrary = true; + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile)); + } else { + // Search the library paths for the file + if (!FilenameSearch) + LibclcTargetFile += ".bc"; + + for (StringRef LibraryPath : LibraryPaths) { + SmallString<128> LibclcPath(LibraryPath); + llvm::sys::path::append(LibclcPath, LibclcTargetFile); + if (llvm::sys::fs::exists(LibclcPath)) { + FoundBCLibrary = true; + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath)); + break; + } + } + } + + // Since the user requested a library, if we haven't one then report an + // error. + if (!FoundBCLibrary) + D.Diag(diag::err_drv_libclc_not_found) << LibclcTargetFile; + } +} + void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, diff --git a/clang/test/Driver/Inputs/libclc/libclc.bc b/clang/test/Driver/Inputs/libclc/libclc.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/libclc/subdir/libclc.bc b/clang/test/Driver/Inputs/libclc/subdir/libclc.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl new file mode 100644 index 0000000000000..34b863b128682 --- /dev/null +++ b/clang/test/Driver/opencl-libclc.cl @@ -0,0 +1,10 @@ +// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s +// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s +// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s + +// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR +// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR +// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR + +// CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc +// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc >From bde5f5325e17525820bc145abb1f4c43e358d59b Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Tue, 1 Jul 2025 12:26:22 +0100 Subject: [PATCH 3/9] adjust LIBRARY_PATH usage in tests --- clang/test/Driver/opencl-libclc.cl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl index 34b863b128682..8608a4f741b23 100644 --- a/clang/test/Driver/opencl-libclc.cl +++ b/clang/test/Driver/opencl-libclc.cl @@ -1,10 +1,10 @@ // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s -// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s -// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s +// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s +// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s -// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR -// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR -// RUN: env LIBRARY_PATH=%S/Inputs/libclc:$LIBRARY_PATH %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR +// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR +// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR +// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR // CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc // CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc >From 5496f2dd3b449895f8de880f937e6fe3c8f29cd8 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Tue, 1 Jul 2025 12:50:15 +0100 Subject: [PATCH 4/9] fix path checks --- clang/test/Driver/opencl-libclc.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl index 8608a4f741b23..b7780d1af8d40 100644 --- a/clang/test/Driver/opencl-libclc.cl +++ b/clang/test/Driver/opencl-libclc.cl @@ -6,5 +6,5 @@ // RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR // RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR -// CHECK: -mlink-builtin-bitcode{{.*}}Inputs/libclc/libclc.bc -// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs/libclc/subdir/libclc.bc +// CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc +// CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc >From df02a9e5a46206d5ed4880c293fbd0faf6728db2 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 7 Jul 2025 12:06:35 +0100 Subject: [PATCH 5/9] consume_front --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 40aaa7ca324be..e6d9974d734bf 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2969,9 +2969,8 @@ void tools::addOpenCLBuiltinsLib(const Driver &D, StringRef LibclcNamespec(A->getValue()); // If the namespec is of the form :filename, search for that file. - bool FilenameSearch = LibclcNamespec.starts_with(":"); - SmallString<128> LibclcTargetFile( - LibclcNamespec.drop_front(FilenameSearch ? 1 : 0)); + bool FilenameSearch = LibclcNamespec.consume_front(":"); + SmallString<128> LibclcTargetFile(LibclcNamespec); if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) { FoundBCLibrary = true; >From 00b32eac8f3a06f9380a2cefeb20bef4e829d0fc Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 7 Jul 2025 12:23:10 +0100 Subject: [PATCH 6/9] remove logic for LIBRARY_PATH --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 67 ++++++++-------------- clang/test/Driver/opencl-libclc.cl | 7 +-- 2 files changed, 24 insertions(+), 50 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index e6d9974d734bf..36f335154e6bc 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2946,57 +2946,36 @@ void tools::addOpenCLBuiltinsLib(const Driver &D, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) { // Check whether user specifies a libclc bytecode library - if (const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ)) { - SmallVector<StringRef, 8> LibraryPaths; + const Arg *A = DriverArgs.getLastArg(options::OPT_libclc_lib_EQ); + if (!A) + return; - // Add user defined library paths from LIBRARY_PATH. - std::optional<std::string> LibPath = - llvm::sys::Process::GetEnv("LIBRARY_PATH"); - if (LibPath) { - SmallVector<StringRef, 8> Frags; - const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'}; - llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr); - for (StringRef Path : Frags) - LibraryPaths.emplace_back(Path.trim()); - } + // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/ + SmallString<128> LibclcPath(D.ResourceDir); + llvm::sys::path::append(LibclcPath, "lib", "libclc"); - // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/libclc/ - SmallString<128> LibclcPath(D.ResourceDir); - llvm::sys::path::append(LibclcPath, "lib", "libclc"); - LibraryPaths.emplace_back(LibclcPath); + // If the namespec is of the form :filename, search for that file. + StringRef LibclcNamespec(A->getValue()); + bool FilenameSearch = LibclcNamespec.consume_front(":"); + SmallString<128> LibclcTargetFile(LibclcNamespec); - bool FoundBCLibrary = false; - StringRef LibclcNamespec(A->getValue()); - - // If the namespec is of the form :filename, search for that file. - bool FilenameSearch = LibclcNamespec.consume_front(":"); - SmallString<128> LibclcTargetFile(LibclcNamespec); + if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) { + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile)); + } else { + // Search the library paths for the file + if (!FilenameSearch) + LibclcTargetFile += ".bc"; - if (FilenameSearch && llvm::sys::fs::exists(LibclcTargetFile)) { - FoundBCLibrary = true; + llvm::sys::path::append(LibclcPath, LibclcTargetFile); + if (llvm::sys::fs::exists(LibclcPath)) { CC1Args.push_back("-mlink-builtin-bitcode"); - CC1Args.push_back(DriverArgs.MakeArgString(LibclcTargetFile)); + CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath)); } else { - // Search the library paths for the file - if (!FilenameSearch) - LibclcTargetFile += ".bc"; - - for (StringRef LibraryPath : LibraryPaths) { - SmallString<128> LibclcPath(LibraryPath); - llvm::sys::path::append(LibclcPath, LibclcTargetFile); - if (llvm::sys::fs::exists(LibclcPath)) { - FoundBCLibrary = true; - CC1Args.push_back("-mlink-builtin-bitcode"); - CC1Args.push_back(DriverArgs.MakeArgString(LibclcPath)); - break; - } - } - } - - // Since the user requested a library, if we haven't one then report an - // error. - if (!FoundBCLibrary) + // Since the user requested a library, if we haven't one then report an + // error. D.Diag(diag::err_drv_libclc_not_found) << LibclcTargetFile; + } } } diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl index b7780d1af8d40..80c5d10550146 100644 --- a/clang/test/Driver/opencl-libclc.cl +++ b/clang/test/Driver/opencl-libclc.cl @@ -1,10 +1,5 @@ // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s -// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s -// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s - -// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=libclc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR -// RUN: env LIBRARY_PATH=%S/Inputs/libclc/subdir %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR -// RUN: env LIBRARY_PATH=%S/Inputs/libclc %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR +// RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR // CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc // CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc >From 1c16dbbd8225cb481bfd9a00fa61493a03478e3f Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 7 Jul 2025 12:29:04 +0100 Subject: [PATCH 7/9] fix diagnostic; add error test --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 +-- clang/test/Driver/opencl-libclc.cl | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 019161c22a24f..8d07ade73ec89 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -392,8 +392,7 @@ def warn_drv_fraw_string_literals_in_cxx11 : Warning< "ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">, InGroup<UnusedCommandLineArgument>; -def err_drv_libclc_not_found : Error< - "no libclc library '%0' found in the clang resource directory or in LIBRARY_PATH">; +def err_drv_libclc_not_found : Error<"no libclc library '%0' found in the clang resource directory">; def err_drv_invalid_malign_branch_EQ : Error< "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; diff --git a/clang/test/Driver/opencl-libclc.cl b/clang/test/Driver/opencl-libclc.cl index 80c5d10550146..185690768c75b 100644 --- a/clang/test/Driver/opencl-libclc.cl +++ b/clang/test/Driver/opencl-libclc.cl @@ -1,5 +1,9 @@ // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/libclc.bc %s 2>&1 | FileCheck %s // RUN: %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/subdir/libclc.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-SUBDIR +// RUN: not %clang -### -target amdgcn-amd-amdhsa --no-offloadlib --libclc-lib=:%S/Inputs/libclc/subdir/not-here.bc %s 2>&1 | FileCheck %s --check-prefix CHECK-ERROR + // CHECK: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}libclc.bc // CHECK-SUBDIR: -mlink-builtin-bitcode{{.*}}Inputs{{/|\\\\}}libclc{{/|\\\\}}subdir{{/|\\\\}}libclc.bc + +// CHECK-ERROR: no libclc library{{.*}}not-here.bc' found in the clang resource directory >From b83c111bba9bc4531a941170018060fdbf7de6d6 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Thu, 4 Apr 2024 17:49:13 +0100 Subject: [PATCH 8/9] [libclc] Add initial LIT tests These tests aren't very meaningful and aren't immune to false positives, but they do get the project building when running 'check-all' and so enable libclc testing in CI. --- libclc/CMakeLists.txt | 4 + libclc/test/CMakeLists.txt | 35 +++ libclc/test/add_sat.cl | 11 - libclc/test/as_type.cl | 11 - libclc/test/convert.cl | 11 - libclc/test/cos.cl | 11 - libclc/test/cross.cl | 11 - libclc/test/fabs.cl | 11 - libclc/test/geometric/cross.cl | 51 +++++ libclc/test/get_group_id.cl | 11 - libclc/test/integer/add_sat.cl | 32 +++ libclc/test/integer/sub_sat.cl | 72 +++++++ libclc/test/lit.cfg.py | 44 ++++ libclc/test/lit.site.cfg.py.in | 23 ++ libclc/test/math/cos.cl | 296 ++++++++++++++++++++++++++ libclc/test/math/fabs.cl | 32 +++ libclc/test/math/rsqrt.cl | 48 +++++ libclc/test/misc/as_type.cl | 31 +++ libclc/test/misc/convert.cl | 32 +++ libclc/test/rsqrt.cl | 14 -- libclc/test/subsat.cl | 27 --- libclc/test/work-item/get_group_id.cl | 33 +++ 22 files changed, 733 insertions(+), 118 deletions(-) create mode 100644 libclc/test/CMakeLists.txt delete mode 100644 libclc/test/add_sat.cl delete mode 100644 libclc/test/as_type.cl delete mode 100644 libclc/test/convert.cl delete mode 100644 libclc/test/cos.cl delete mode 100644 libclc/test/cross.cl delete mode 100644 libclc/test/fabs.cl create mode 100644 libclc/test/geometric/cross.cl delete mode 100644 libclc/test/get_group_id.cl create mode 100644 libclc/test/integer/add_sat.cl create mode 100644 libclc/test/integer/sub_sat.cl create mode 100644 libclc/test/lit.cfg.py create mode 100644 libclc/test/lit.site.cfg.py.in create mode 100644 libclc/test/math/cos.cl create mode 100644 libclc/test/math/fabs.cl create mode 100644 libclc/test/math/rsqrt.cl create mode 100644 libclc/test/misc/as_type.cl create mode 100644 libclc/test/misc/convert.cl delete mode 100644 libclc/test/rsqrt.cl delete mode 100644 libclc/test/subsat.cl create mode 100644 libclc/test/work-item/get_group_id.cl diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 8bc3a75739fcd..673309c88e2cd 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -494,3 +494,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) ) endforeach( d ) endforeach( t ) + +if( NOT LIBCLC_STANDALONE_BUILD ) + add_subdirectory( test ) +endif() diff --git a/libclc/test/CMakeLists.txt b/libclc/test/CMakeLists.txt new file mode 100644 index 0000000000000..653b39c2821a7 --- /dev/null +++ b/libclc/test/CMakeLists.txt @@ -0,0 +1,35 @@ +set( LIBCLC_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) + +set( LIBCLC_TEST_TARGETS_ALL + amdgcn-mesa-mesa3d +) + +foreach( target IN LISTS LIBCLC_TEST_TARGETS_ALL ) + # If we haven't built this libclc target, don't build the tests + if( NOT TARGET prepare-${target} ) + message( WARNING "libclc tests require target ${target}. Tests will not be built" ) + # Add a dummy target + add_custom_target( check-libclc ) + return() + endif() + + list( APPEND LIBCLC_TEST_DEPS prepare-${target} ) +endforeach() + +list( APPEND LIBCLC_TEST_DEPS + ${clang_target} + FileCheck +) + +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py +) + +add_lit_testsuite( check-libclc + "Running libclc regression tests" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${LIBCLC_TEST_DEPS} +) diff --git a/libclc/test/add_sat.cl b/libclc/test/add_sat.cl deleted file mode 100644 index 87c3d39df3542..0000000000000 --- a/libclc/test/add_sat.cl +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void foo(__global char *a, __global char *b, __global char *c) { - *a = add_sat(*b, *c); -} diff --git a/libclc/test/as_type.cl b/libclc/test/as_type.cl deleted file mode 100644 index a926f48c4ea0c..0000000000000 --- a/libclc/test/as_type.cl +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void foo(int4 *x, float4 *y) { - *x = as_int4(*y); -} diff --git a/libclc/test/convert.cl b/libclc/test/convert.cl deleted file mode 100644 index 8eba608dc5f8c..0000000000000 --- a/libclc/test/convert.cl +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void foo(int4 *x, float4 *y) { - *x = convert_int4(*y); -} diff --git a/libclc/test/cos.cl b/libclc/test/cos.cl deleted file mode 100644 index 92a998b3ba5f7..0000000000000 --- a/libclc/test/cos.cl +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void foo(float4 *f) { - *f = cos(*f); -} diff --git a/libclc/test/cross.cl b/libclc/test/cross.cl deleted file mode 100644 index 90762d0d073a6..0000000000000 --- a/libclc/test/cross.cl +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void foo(float4 *f) { - *f = cross(f[0], f[1]); -} diff --git a/libclc/test/fabs.cl b/libclc/test/fabs.cl deleted file mode 100644 index 3f5a964e0418a..0000000000000 --- a/libclc/test/fabs.cl +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void foo(float *f) { - *f = fabs(*f); -} diff --git a/libclc/test/geometric/cross.cl b/libclc/test/geometric/cross.cl new file mode 100644 index 0000000000000..4cb8c53bea5ee --- /dev/null +++ b/libclc/test/geometric/cross.cl @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[F]], i64 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 +// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +// CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 +// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = fneg float [[TMP5]] +// CHECK-NEXT: [[NEG_I_I:%.*]] = fmul float [[TMP4]], [[TMP6]] +// CHECK-NEXT: [[TMP7:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP2]], float [[TMP3]], float [[NEG_I_I]]) +// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP10:%.*]] = fneg float [[TMP3]] +// CHECK-NEXT: [[NEG3_I_I:%.*]] = fmul float [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP4]], float [[TMP8]], float [[NEG3_I_I]]) +// CHECK-NEXT: [[TMP12:%.*]] = fneg float [[TMP8]] +// CHECK-NEXT: [[NEG6_I_I:%.*]] = fmul float [[TMP2]], [[TMP12]] +// CHECK-NEXT: [[TMP13:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP9]], float [[TMP5]], float [[NEG6_I_I]]) +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float [[TMP7]], i64 0 +// CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 1 +// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP13]], i64 2 +// CHECK-NEXT: store <4 x float> [[VECINIT8_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global float4 *f) { + *f = cross(f[0], f[1]); +} +//. +// CHECK: [[META6]] = !{i32 1} +// CHECK: [[META7]] = !{!"none"} +// CHECK: [[META8]] = !{!"float4*"} +// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*"} +// CHECK: [[META10]] = !{!""} +// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/libclc/test/get_group_id.cl b/libclc/test/get_group_id.cl deleted file mode 100644 index c2349a0076889..0000000000000 --- a/libclc/test/get_group_id.cl +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void foo(int *i) { - i[get_group_id(0)] = 1; -} diff --git a/libclc/test/integer/add_sat.cl b/libclc/test/integer/add_sat.cl new file mode 100644 index 0000000000000..ef5bf77b67d21 --- /dev/null +++ b/libclc/test/integer/add_sat.cl @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[B:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(1) [[B]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(1) [[C]], align 1, !tbaa [[TBAA10]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.sadd.sat.i8(i8 [[TMP0]], i8 [[TMP1]]) +// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global char *a, __global char *b, __global char *c) { + *a = add_sat(*b, *c); +} +//. +// CHECK: [[META6]] = !{i32 1, i32 1, i32 1} +// CHECK: [[META7]] = !{!"none", !"none", !"none"} +// CHECK: [[META8]] = !{!"char*", !"char*", !"char*"} +// CHECK: [[META9]] = !{!"", !"", !""} +// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0} +// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/libclc/test/integer/sub_sat.cl b/libclc/test/integer/sub_sat.cl new file mode 100644 index 0000000000000..7c3f0a3aa306f --- /dev/null +++ b/libclc/test/integer/sub_sat.cl @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_char( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.ssub.sat.i8(i8 [[X]], i8 [[Y]]) +// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-NEXT: ret void +// +__kernel void test_subsat_char(__global char *a, char x, char y) { + *a = sub_sat(x, y); + return; +} + +// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_uchar( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META9]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.usub.sat.i8(i8 [[X]], i8 [[Y]]) +// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]] +// CHECK-NEXT: ret void +// +__kernel void test_subsat_uchar(__global uchar *a, uchar x, uchar y) { + *a = sub_sat(x, y); + return; +} + +// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_long( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META9]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.ssub.sat.i64(i64 [[X]], i64 [[Y]]) +// CHECK-NEXT: store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-NEXT: ret void +// +__kernel void test_subsat_long(__global long *a, long x, long y) { + *a = sub_sat(x, y); + return; +} + +// CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_ulong( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META9]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.usub.sat.i64(i64 [[X]], i64 [[Y]]) +// CHECK-NEXT: store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: ret void +// +__kernel void test_subsat_ulong(__global ulong *a, ulong x, ulong y) { + *a = sub_sat(x, y); + return; +} +//. +// CHECK: [[META6]] = !{i32 1, i32 0, i32 0} +// CHECK: [[META7]] = !{!"none", !"none", !"none"} +// CHECK: [[META8]] = !{!"char*", !"char", !"char"} +// CHECK: [[META9]] = !{!"", !"", !""} +// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0} +// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META13]] = !{!"uchar*", !"uchar", !"uchar"} +// CHECK: [[META14]] = !{!"long*", !"long", !"long"} +// CHECK: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// CHECK: [[META16]] = !{!"long", [[META11]], i64 0} +// CHECK: [[META17]] = !{!"ulong*", !"ulong", !"ulong"} +//. diff --git a/libclc/test/lit.cfg.py b/libclc/test/lit.cfg.py new file mode 100644 index 0000000000000..03532dad12d2a --- /dev/null +++ b/libclc/test/lit.cfg.py @@ -0,0 +1,44 @@ +import os + +import lit.formats +import lit.util + +from lit.llvm import llvm_config +import site + +# Configuration file for the 'lit' test runner. + +# name: The name of this test suite. +config.name = "libclc" + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = [ + ".cl", +] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.join(os.path.dirname(__file__)) + +# test_exec_root: The root path where tests should be run. +config.test_exec_root = os.path.join(config.test_run_dir, "test") + +llvm_config.use_default_substitutions() + +llvm_config.use_clang() + +tools = [] +tool_dirs = [config.llvm_tools_dir] + +llvm_config.add_tool_substitutions(tools, tool_dirs) + +# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. +# See https://github.com/llvm/llvm-project/issues/106636 for more details. +# +# We prefer the lit internal shell which provides a better user experience on failures +# unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 env var. +use_lit_shell = True +lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") +if lit_shell_env: + use_lit_shell = lit.util.pythonize_bool(lit_shell_env) + +config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) diff --git a/libclc/test/lit.site.cfg.py.in b/libclc/test/lit.site.cfg.py.in new file mode 100644 index 0000000000000..a8618cf30c4fc --- /dev/null +++ b/libclc/test/lit.site.cfg.py.in @@ -0,0 +1,23 @@ +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.llvm_src_root = path(r"@LLVM_SOURCE_DIR@") +config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@") +config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@")) +config.llvm_libs_dir = lit_config.substitute(path(r"@LLVM_LIBS_DIR@")) +config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@")) +config.lit_tools_dir = path(r"@LLVM_LIT_TOOLS_DIR@") +config.host_triple = "@LLVM_HOST_TRIPLE@" +config.target_triple = "@LLVM_TARGET_TRIPLE@" +config.host_arch = "@HOST_ARCH@" +config.python_executable = "@Python3_EXECUTABLE@" +config.libclc_src_dir = path(r"@LIBCLC_SOURCE_DIR@") +config.test_run_dir = path(r"@LIBCLC_BINARY_DIR@") + +import lit.llvm +lit.llvm.initialize(lit_config, config) + +# Let the main config do the real work. +lit_config.load_config( + config, os.path.join(config.libclc_src_dir, "test/lit.cfg.py")) diff --git a/libclc/test/math/cos.cl b/libclc/test/math/cos.cl new file mode 100644 index 0000000000000..79272cce4d54d --- /dev/null +++ b/libclc/test/math/cos.cl @@ -0,0 +1,296 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[ELT_ABS_I_I_I:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp olt <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x4160000000000000) +// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[ELT_ABS_I_I_I]], <4 x float> splat (float 0x3FE45F3060000000), <4 x float> splat (float 5.000000e-01)) +// CHECK-NEXT: [[ELT_TRUNC_I_I:%.*]] = tail call noundef <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]]) +// CHECK-NEXT: [[MUL_I30_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3FF921FB40000000) +// CHECK-NEXT: [[FNEG_I31_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I30_I_I_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I31_I_I_I_I]]) +// CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[MUL_I30_I_I_I_I]] +// CHECK-NEXT: [[SUB2_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[SUB_I_I_I_I]] +// CHECK-NEXT: [[SUB3_I_I_I_I:%.*]] = fsub <4 x float> [[SUB2_I_I_I_I]], [[MUL_I30_I_I_I_I]] +// CHECK-NEXT: [[SUB4_I_I_I_I:%.*]] = fsub <4 x float> [[SUB3_I_I_I_I]], [[TMP2]] +// CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd <4 x float> [[SUB_I_I_I_I]], [[SUB4_I_I_I_I]] +// CHECK-NEXT: [[MUL_I27_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3E74442D00000000) +// CHECK-NEXT: [[FNEG_I28_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I27_I_I_I_I]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[FNEG_I28_I_I_I_I]]) +// CHECK-NEXT: [[SUB5_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[MUL_I27_I_I_I_I]] +// CHECK-NEXT: [[SUB6_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[SUB5_I_I_I_I]] +// CHECK-NEXT: [[SUB7_I_I_I_I:%.*]] = fsub <4 x float> [[SUB6_I_I_I_I]], [[MUL_I27_I_I_I_I]] +// CHECK-NEXT: [[SUB8_I_I_I_I:%.*]] = fsub <4 x float> [[SUB7_I_I_I_I]], [[TMP3]] +// CHECK-NEXT: [[ADD9_I_I_I_I:%.*]] = fadd <4 x float> [[SUB5_I_I_I_I]], [[SUB8_I_I_I_I]] +// CHECK-NEXT: [[MUL_I_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3CF8469880000000) +// CHECK-NEXT: [[FNEG_I_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I_I_I_I_I]] +// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3CF8469880000000), <4 x float> [[FNEG_I_I_I_I_I]]) +// CHECK-NEXT: [[SUB10_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[MUL_I_I_I_I_I]] +// CHECK-NEXT: [[SUB11_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[SUB10_I_I_I_I]] +// CHECK-NEXT: [[SUB12_I_I_I_I:%.*]] = fsub <4 x float> [[SUB11_I_I_I_I]], [[MUL_I_I_I_I_I]] +// CHECK-NEXT: [[ADD13_I_I_I_I:%.*]] = fadd <4 x float> [[SUB10_I_I_I_I]], [[SUB12_I_I_I_I]] +// CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <4 x float> [[TMP4]] +// CHECK-NEXT: [[CONV_I_I_I:%.*]] = fptosi <4 x float> [[ELT_TRUNC_I_I]] to <4 x i32> +// CHECK-NEXT: [[ASTYPE_I_I_I:%.*]] = bitcast <4 x float> [[ELT_ABS_I_I_I]] to <4 x i32> +// CHECK-NEXT: [[SHR_I_I_I:%.*]] = lshr <4 x i32> [[ASTYPE_I_I_I]], splat (i32 23) +// CHECK-NEXT: [[AND_I11_I_I:%.*]] = and <4 x i32> [[ASTYPE_I_I_I]], splat (i32 8388607) +// CHECK-NEXT: [[OR_I_I_I:%.*]] = or disjoint <4 x i32> [[AND_I11_I_I]], splat (i32 8388608) +// CHECK-NEXT: [[MUL_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -28220501) +// CHECK-NEXT: [[CONV_I1_I27_I_I:%.*]] = zext nneg <4 x i32> [[OR_I_I_I]] to <4 x i64> +// CHECK-NEXT: [[MUL_I28_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4266746795) +// CHECK-NEXT: [[SHR_I29_I_I:%.*]] = lshr <4 x i64> [[MUL_I28_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I30_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I29_I_I]] to <4 x i32> +// CHECK-NEXT: [[MUL2_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1011060801) +// CHECK-NEXT: [[ADD_I_I_I:%.*]] = add <4 x i32> [[MUL2_I_I_I]], [[CONV_I2_I30_I_I]] +// CHECK-NEXT: [[MUL_I24_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1011060801) +// CHECK-NEXT: [[SHR_I25_I_I:%.*]] = lshr <4 x i64> [[MUL_I24_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I26_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I25_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD_I_I_I]], [[CONV_I2_I30_I_I]] +// CHECK-NEXT: [[SEXT_I_I1_I:%.*]] = zext <4 x i1> [[CMP_I_I_I]] to <4 x i32> +// CHECK-NEXT: [[ADD5_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT_I_I1_I]], [[CONV_I2_I26_I_I]] +// CHECK-NEXT: [[MUL6_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -614296167) +// CHECK-NEXT: [[ADD7_I_I_I:%.*]] = add <4 x i32> [[ADD5_I_I_I]], [[MUL6_I_I_I]] +// CHECK-NEXT: [[MUL_I20_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 3680671129) +// CHECK-NEXT: [[SHR_I21_I_I:%.*]] = lshr <4 x i64> [[MUL_I20_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I22_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I21_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP9_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD7_I_I_I]], [[ADD5_I_I_I]] +// CHECK-NEXT: [[SEXT10_I_I_I:%.*]] = zext <4 x i1> [[CMP9_I_I_I]] to <4 x i32> +// CHECK-NEXT: [[ADD13_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT10_I_I_I]], [[CONV_I2_I22_I_I]] +// CHECK-NEXT: [[MUL14_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -181084736) +// CHECK-NEXT: [[ADD15_I_I_I:%.*]] = add <4 x i32> [[ADD13_I_I_I]], [[MUL14_I_I_I]] +// CHECK-NEXT: [[MUL_I16_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4113882560) +// CHECK-NEXT: [[SHR_I17_I_I:%.*]] = lshr <4 x i64> [[MUL_I16_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I18_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I17_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP17_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD15_I_I_I]], [[ADD13_I_I_I]] +// CHECK-NEXT: [[SEXT18_I_I_I:%.*]] = zext <4 x i1> [[CMP17_I_I_I]] to <4 x i32> +// CHECK-NEXT: [[ADD21_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT18_I_I_I]], [[CONV_I2_I18_I_I]] +// CHECK-NEXT: [[MUL22_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -64530479) +// CHECK-NEXT: [[ADD23_I_I_I:%.*]] = add <4 x i32> [[ADD21_I_I_I]], [[MUL22_I_I_I]] +// CHECK-NEXT: [[MUL_I12_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4230436817) +// CHECK-NEXT: [[SHR_I13_I_I:%.*]] = lshr <4 x i64> [[MUL_I12_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I14_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I13_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP25_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD23_I_I_I]], [[ADD21_I_I_I]] +// CHECK-NEXT: [[SEXT26_I_I_I:%.*]] = zext <4 x i1> [[CMP25_I_I_I]] to <4 x i32> +// CHECK-NEXT: [[ADD29_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT26_I_I_I]], [[CONV_I2_I14_I_I]] +// CHECK-NEXT: [[MUL30_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1313084713) +// CHECK-NEXT: [[ADD31_I_I_I:%.*]] = add <4 x i32> [[ADD29_I_I_I]], [[MUL30_I_I_I]] +// CHECK-NEXT: [[MUL_I8_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1313084713) +// CHECK-NEXT: [[SHR_I9_I_I:%.*]] = lshr <4 x i64> [[MUL_I8_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I10_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I9_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP33_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD31_I_I_I]], [[ADD29_I_I_I]] +// CHECK-NEXT: [[SEXT34_I_I_I:%.*]] = zext <4 x i1> [[CMP33_I_I_I]] to <4 x i32> +// CHECK-NEXT: [[ADD37_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT34_I_I_I]], [[CONV_I2_I10_I_I]] +// CHECK-NEXT: [[MUL38_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -1560706194) +// CHECK-NEXT: [[ADD39_I_I_I:%.*]] = add <4 x i32> [[ADD37_I_I_I]], [[MUL38_I_I_I]] +// CHECK-NEXT: [[MUL_I5_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 2734261102) +// CHECK-NEXT: [[SHR_I6_I_I:%.*]] = lshr <4 x i64> [[MUL_I5_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I6_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP41_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD39_I_I_I]], [[ADD37_I_I_I]] +// CHECK-NEXT: [[SEXT42_I_I_I:%.*]] = zext <4 x i1> [[CMP41_I_I_I]] to <4 x i32> +// CHECK-NEXT: [[ADD45_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT42_I_I_I]], [[CONV_I2_I_I_I]] +// CHECK-NEXT: [[SUB47_I_I_I:%.*]] = add nsw <4 x i32> [[SHR_I_I_I]], splat (i32 -120) +// CHECK-NEXT: [[CMP48_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB47_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[COND51_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD39_I_I_I]], <4 x i32> [[ADD45_I_I_I]] +// CHECK-NEXT: [[COND53_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD31_I_I_I]], <4 x i32> [[ADD39_I_I_I]] +// CHECK-NEXT: [[COND55_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD23_I_I_I]], <4 x i32> [[ADD31_I_I_I]] +// CHECK-NEXT: [[COND57_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD15_I_I_I]], <4 x i32> [[ADD23_I_I_I]] +// CHECK-NEXT: [[COND59_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD7_I_I_I]], <4 x i32> [[ADD15_I_I_I]] +// CHECK-NEXT: [[COND61_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD_I_I_I]], <4 x i32> [[ADD7_I_I_I]] +// CHECK-NEXT: [[COND63_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[MUL_I_I_I]], <4 x i32> [[ADD_I_I_I]] +// CHECK-NEXT: [[DOTNEG_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB66_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG_I_I_I]], [[SUB47_I_I_I]] +// CHECK-NEXT: [[CMP67_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB66_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[COND70_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND53_I_I_I]], <4 x i32> [[COND51_I_I_I]] +// CHECK-NEXT: [[COND72_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND55_I_I_I]], <4 x i32> [[COND53_I_I_I]] +// CHECK-NEXT: [[COND74_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND57_I_I_I]], <4 x i32> [[COND55_I_I_I]] +// CHECK-NEXT: [[COND76_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND59_I_I_I]], <4 x i32> [[COND57_I_I_I]] +// CHECK-NEXT: [[COND78_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND61_I_I_I]], <4 x i32> [[COND59_I_I_I]] +// CHECK-NEXT: [[COND80_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND63_I_I_I]], <4 x i32> [[COND61_I_I_I]] +// CHECK-NEXT: [[DOTNEG379_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB83_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG379_I_I_I]], [[SUB66_I_I_I]] +// CHECK-NEXT: [[CMP84_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB83_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[COND87_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND72_I_I_I]], <4 x i32> [[COND70_I_I_I]] +// CHECK-NEXT: [[COND89_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND74_I_I_I]], <4 x i32> [[COND72_I_I_I]] +// CHECK-NEXT: [[COND91_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND76_I_I_I]], <4 x i32> [[COND74_I_I_I]] +// CHECK-NEXT: [[COND93_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND78_I_I_I]], <4 x i32> [[COND76_I_I_I]] +// CHECK-NEXT: [[COND95_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND80_I_I_I]], <4 x i32> [[COND78_I_I_I]] +// CHECK-NEXT: [[DOTNEG380_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB98_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG380_I_I_I]], [[SUB83_I_I_I]] +// CHECK-NEXT: [[CMP99_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB98_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[COND102_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND89_I_I_I]], <4 x i32> [[COND87_I_I_I]] +// CHECK-NEXT: [[COND104_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND91_I_I_I]], <4 x i32> [[COND89_I_I_I]] +// CHECK-NEXT: [[COND106_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND93_I_I_I]], <4 x i32> [[COND91_I_I_I]] +// CHECK-NEXT: [[COND108_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND95_I_I_I]], <4 x i32> [[COND93_I_I_I]] +// CHECK-NEXT: [[DOTNEG381_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB111_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SUB98_I_I_I]] +// CHECK-NEXT: [[CMP112_NOT_I_I_I:%.*]] = icmp eq <4 x i32> [[DOTNEG381_I_I_I]], [[SUB111_I_I_I]] +// CHECK-NEXT: [[SUB114_I_I_I:%.*]] = sub nsw <4 x i32> splat (i32 24), [[SHR_I_I_I]] +// CHECK-NEXT: [[SHL_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB47_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[SHL_I_I_I:%.*]] = shl <4 x i32> [[COND102_I_I_I]], [[SHL_MASK_I_I_I]] +// CHECK-NEXT: [[SHR_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB114_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[SHR116_I_I_I:%.*]] = lshr <4 x i32> [[COND104_I_I_I]], [[SHR_MASK_I_I_I]] +// CHECK-NEXT: [[OR117_I_I_I:%.*]] = or <4 x i32> [[SHL_I_I_I]], [[SHR116_I_I_I]] +// CHECK-NEXT: [[SHL120_I_I_I:%.*]] = shl <4 x i32> [[COND104_I_I_I]], [[SHL_MASK_I_I_I]] +// CHECK-NEXT: [[SHR122_I_I_I:%.*]] = lshr <4 x i32> [[COND106_I_I_I]], [[SHR_MASK_I_I_I]] +// CHECK-NEXT: [[OR123_I_I_I:%.*]] = or <4 x i32> [[SHL120_I_I_I]], [[SHR122_I_I_I]] +// CHECK-NEXT: [[SHL126_I_I_I:%.*]] = shl <4 x i32> [[COND106_I_I_I]], [[SHL_MASK_I_I_I]] +// CHECK-NEXT: [[SHR128_I_I_I:%.*]] = lshr <4 x i32> [[COND108_I_I_I]], [[SHR_MASK_I_I_I]] +// CHECK-NEXT: [[OR129_I_I_I:%.*]] = or <4 x i32> [[SHL126_I_I_I]], [[SHR128_I_I_I]] +// CHECK-NEXT: [[COND131_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND102_I_I_I]], <4 x i32> [[OR117_I_I_I]] +// CHECK-NEXT: [[COND133_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND104_I_I_I]], <4 x i32> [[OR123_I_I_I]] +// CHECK-NEXT: [[COND135_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND106_I_I_I]], <4 x i32> [[OR129_I_I_I]] +// CHECK-NEXT: [[SHR136_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 29) +// CHECK-NEXT: [[OR139_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND131_I_I_I]], <4 x i32> [[COND133_I_I_I]], <4 x i32> splat (i32 2)) +// CHECK-NEXT: [[OR142_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND133_I_I_I]], <4 x i32> [[COND135_I_I_I]], <4 x i32> splat (i32 2)) +// CHECK-NEXT: [[OR145_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND135_I_I_I]], <4 x i32> [[COND108_I_I_I]], <4 x i32> splat (i32 2)) +// CHECK-NEXT: [[AND146_I_I_I:%.*]] = and <4 x i32> [[SHR136_I_I_I]], splat (i32 1) +// CHECK-NEXT: [[SEXT148_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[AND146_I_I_I]] +// CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[SEXT148_I_I_I]], splat (i32 -2147483648) +// CHECK-NEXT: [[XOR_I_I_I:%.*]] = xor <4 x i32> [[OR139_I_I_I]], [[SEXT148_I_I_I]] +// CHECK-NEXT: [[XOR156_I_I_I:%.*]] = xor <4 x i32> [[OR142_I_I_I]], [[SEXT148_I_I_I]] +// CHECK-NEXT: [[XOR157_I_I_I:%.*]] = xor <4 x i32> [[OR145_I_I_I]], [[SEXT148_I_I_I]] +// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 0 +// CHECK-NEXT: [[TMP7:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP6]], i1 false) +// CHECK-NEXT: [[VECINIT_I1_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 +// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 1 +// CHECK-NEXT: [[TMP9:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP8]], i1 false) +// CHECK-NEXT: [[VECINIT2_I2_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I1_I_I]], i32 [[TMP9]], i64 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 2 +// CHECK-NEXT: [[TMP11:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP10]], i1 false) +// CHECK-NEXT: [[VECINIT4_I3_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I2_I_I]], i32 [[TMP11]], i64 2 +// CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 3 +// CHECK-NEXT: [[TMP13:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP12]], i1 false) +// CHECK-NEXT: [[VECINIT6_I4_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I3_I_I]], i32 [[TMP13]], i64 3 +// CHECK-NEXT: [[ADD159_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 1) +// CHECK-NEXT: [[SHL_MASK162_I_I_I:%.*]] = and <4 x i32> [[ADD159_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[SHL163_I_I_I:%.*]] = shl <4 x i32> [[XOR_I_I_I]], [[SHL_MASK162_I_I_I]] +// CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 31) +// CHECK-NEXT: [[SHR_MASK164_I_I_I:%.*]] = xor <4 x i32> [[TMP14]], splat (i32 31) +// CHECK-NEXT: [[SHR165_I_I_I:%.*]] = lshr <4 x i32> [[XOR156_I_I_I]], [[SHR_MASK164_I_I_I]] +// CHECK-NEXT: [[OR166_I_I_I:%.*]] = or <4 x i32> [[SHL163_I_I_I]], [[SHR165_I_I_I]] +// CHECK-NEXT: [[SHL169_I_I_I:%.*]] = shl <4 x i32> [[XOR156_I_I_I]], [[SHL_MASK162_I_I_I]] +// CHECK-NEXT: [[SHR171_I_I_I:%.*]] = lshr <4 x i32> [[XOR157_I_I_I]], [[SHR_MASK164_I_I_I]] +// CHECK-NEXT: [[OR172_I_I_I:%.*]] = or <4 x i32> [[SHL169_I_I_I]], [[SHR171_I_I_I]] +// CHECK-NEXT: [[SHR176_I_I_I:%.*]] = lshr <4 x i32> [[OR166_I_I_I]], splat (i32 9) +// CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 23) +// CHECK-NEXT: [[REASS_SUB:%.*]] = sub nsw <4 x i32> [[SHR176_I_I_I]], [[TMP15]] +// CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[REASS_SUB]], splat (i32 1056964608) +// CHECK-NEXT: [[OR177_I_I_I:%.*]] = or <4 x i32> [[TMP16]], [[TMP5]] +// CHECK-NEXT: [[ASTYPE178_I_I_I:%.*]] = bitcast <4 x i32> [[OR177_I_I_I]] to <4 x float> +// CHECK-NEXT: [[OR181_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[OR166_I_I_I]], <4 x i32> [[OR172_I_I_I]], <4 x i32> splat (i32 23)) +// CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 0 +// CHECK-NEXT: [[TMP18:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP17]], i1 false) +// CHECK-NEXT: [[VECINIT_I_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 +// CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 1 +// CHECK-NEXT: [[TMP20:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP19]], i1 false) +// CHECK-NEXT: [[VECINIT2_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I_I_I]], i32 [[TMP20]], i64 1 +// CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 2 +// CHECK-NEXT: [[TMP22:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP21]], i1 false) +// CHECK-NEXT: [[VECINIT4_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I_I_I]], i32 [[TMP22]], i64 2 +// CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 3 +// CHECK-NEXT: [[TMP24:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP23]], i1 false) +// CHECK-NEXT: [[VECINIT6_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I_I_I]], i32 [[TMP24]], i64 3 +// CHECK-NEXT: [[ADD183_I_I_NEG_I:%.*]] = xor <4 x i32> [[VECINIT6_I_I_I]], splat (i32 -1) +// CHECK-NEXT: [[ADD183_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I_I_I]], splat (i32 1) +// CHECK-NEXT: [[SHL_MASK186_I_I_I:%.*]] = and <4 x i32> [[ADD183_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[SHL187_I_I_I:%.*]] = shl <4 x i32> [[OR181_I_I_I]], [[SHL_MASK186_I_I_I]] +// CHECK-NEXT: [[TMP25:%.*]] = and <4 x i32> [[VECINIT6_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[SHR_MASK189_I_I_I:%.*]] = xor <4 x i32> [[TMP25]], splat (i32 31) +// CHECK-NEXT: [[SHR190_I_I_I:%.*]] = lshr <4 x i32> [[OR172_I_I_I]], [[SHR_MASK189_I_I_I]] +// CHECK-NEXT: [[OR191_I_I_I:%.*]] = or <4 x i32> [[SHL187_I_I_I]], [[SHR190_I_I_I]] +// CHECK-NEXT: [[REASS_SUB10_I:%.*]] = sub nuw nsw <4 x i32> [[ADD183_I_I_NEG_I]], [[VECINIT6_I4_I_I]] +// CHECK-NEXT: [[ADD193_I_I_NEG_I:%.*]] = shl <4 x i32> [[REASS_SUB10_I]], splat (i32 23) +// CHECK-NEXT: [[SHR197_I_I_I:%.*]] = lshr <4 x i32> [[OR191_I_I_I]], splat (i32 9) +// CHECK-NEXT: [[REASS_SUB16_I_I:%.*]] = add <4 x i32> [[ADD193_I_I_NEG_I]], splat (i32 864026624) +// CHECK-NEXT: [[TMP26:%.*]] = or disjoint <4 x i32> [[SHR197_I_I_I]], [[REASS_SUB16_I_I]] +// CHECK-NEXT: [[OR198_I_I_I:%.*]] = or <4 x i32> [[TMP26]], [[TMP5]] +// CHECK-NEXT: [[ASTYPE199_I_I_I:%.*]] = bitcast <4 x i32> [[OR198_I_I_I]] to <4 x float> +// CHECK-NEXT: [[MUL200_I_I_I:%.*]] = fmul <4 x float> [[ASTYPE178_I_I_I]], splat (float 0x3FF921FB40000000) +// CHECK-NEXT: [[FNEG_I_I_I:%.*]] = fneg <4 x float> [[MUL200_I_I_I]] +// CHECK-NEXT: [[TMP27:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I_I_I]]) +// CHECK-NEXT: [[TMP28:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[TMP27]]) +// CHECK-NEXT: [[TMP29:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE199_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[TMP28]]) +// CHECK-NEXT: [[ADD204_I_I_I:%.*]] = fadd <4 x float> [[MUL200_I_I_I]], [[TMP29]] +// CHECK-NEXT: [[SUB205_I_I_I:%.*]] = fsub <4 x float> [[ADD204_I_I_I]], [[MUL200_I_I_I]] +// CHECK-NEXT: [[SUB206_I_I_I:%.*]] = fsub <4 x float> [[TMP29]], [[SUB205_I_I_I]] +// CHECK-NEXT: [[SHR207_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 30) +// CHECK-NEXT: [[ADD209_I_I_I:%.*]] = add nuw nsw <4 x i32> [[AND146_I_I_I]], [[SHR207_I_I_I]] +// CHECK-NEXT: [[COND_V_I2_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[ADD13_I_I_I_I]], <4 x float> [[ADD204_I_I_I]] +// CHECK-NEXT: [[COND4_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[FNEG_I_I_I_I]], <4 x float> [[SUB206_I_I_I]] +// CHECK-NEXT: [[COND6_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[CONV_I_I_I]], <4 x i32> [[ADD209_I_I_I]] +// CHECK-NEXT: [[COND6_I_I:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 2) +// CHECK-NEXT: [[MUL_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[COND_V_I2_I]] +// CHECK-NEXT: [[MUL1_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[MUL_I_I]] +// CHECK-NEXT: [[TMP30:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0x3DE5D93A60000000), <4 x float> splat (float 0xBE5AE5E680000000)) +// CHECK-NEXT: [[TMP31:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP30]], <4 x float> splat (float 0x3EC6DBE4A0000000)) +// CHECK-NEXT: [[TMP32:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP31]], <4 x float> splat (float 0xBF2A013A80000000)) +// CHECK-NEXT: [[TMP33:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP32]], <4 x float> splat (float 0x3F811110E0000000)) +// CHECK-NEXT: [[FNEG_I3_I:%.*]] = fneg <4 x float> [[MUL1_I_I]] +// CHECK-NEXT: [[MUL5_I_I:%.*]] = fmul <4 x float> [[TMP33]], [[FNEG_I3_I]] +// CHECK-NEXT: [[TMP34:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[COND4_V_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[MUL5_I_I]]) +// CHECK-NEXT: [[FNEG7_I_I:%.*]] = fneg <4 x float> [[COND4_V_I_I]] +// CHECK-NEXT: [[TMP35:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP34]], <4 x float> [[FNEG7_I_I]]) +// CHECK-NEXT: [[TMP36:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL1_I_I]], <4 x float> splat (float 0x3FC5555560000000), <4 x float> [[TMP35]]) +// CHECK-NEXT: [[SUB_I_I:%.*]] = fsub <4 x float> [[COND_V_I2_I]], [[TMP36]] +// CHECK-NEXT: [[FNEG_I_I:%.*]] = fneg <4 x float> [[SUB_I_I]] +// CHECK-NEXT: [[TMP37:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0xBDA8FAE9C0000000), <4 x float> splat (float 0x3E21EE9EC0000000)) +// CHECK-NEXT: [[TMP38:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP37]], <4 x float> splat (float 0xBE92524740000000)) +// CHECK-NEXT: [[TMP39:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP38]], <4 x float> splat (float 0x3EFA015C40000000)) +// CHECK-NEXT: [[TMP40:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP39]], <4 x float> splat (float 0xBF56C16C00000000)) +// CHECK-NEXT: [[TMP41:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP40]], <4 x float> splat (float 0x3FA5555560000000)) +// CHECK-NEXT: [[MUL5_I5_I:%.*]] = fmul <4 x float> [[MUL_I_I]], [[TMP41]] +// CHECK-NEXT: [[TMP42:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[COND_V_I2_I]]) +// CHECK-NEXT: [[AND_I_I:%.*]] = bitcast <4 x float> [[TMP42]] to <4 x i32> +// CHECK-NEXT: [[SUB_I6_I:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -16777216) +// CHECK-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -1050253722) +// CHECK-NEXT: [[AND938_I_I:%.*]] = icmp ult <4 x i32> [[TMP43]], splat (i32 11429479) +// CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[AND938_I_I]], <4 x i32> [[SUB_I6_I]], <4 x i32> zeroinitializer +// CHECK-NEXT: [[CMP11_I_I:%.*]] = icmp samesign ugt <4 x i32> [[AND_I_I]], splat (i32 1061683200) +// CHECK-NEXT: [[COND14_I_I:%.*]] = select <4 x i1> [[CMP11_I_I]], <4 x i32> splat (i32 1049624576), <4 x i32> [[TMP44]] +// CHECK-NEXT: [[TMP45:%.*]] = bitcast <4 x i32> [[COND14_I_I]] to <4 x float> +// CHECK-NEXT: [[FNEG_I7_I:%.*]] = fneg <4 x float> [[TMP45]] +// CHECK-NEXT: [[TMP46:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[FNEG_I7_I]]) +// CHECK-NEXT: [[SUB16_I_I:%.*]] = fsub <4 x float> splat (float 1.000000e+00), [[TMP45]] +// CHECK-NEXT: [[FNEG17_I_I:%.*]] = fneg <4 x float> [[COND_V_I2_I]] +// CHECK-NEXT: [[MUL18_I_I:%.*]] = fmul <4 x float> [[COND4_V_I_I]], [[FNEG17_I_I]] +// CHECK-NEXT: [[TMP47:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[MUL5_I5_I]], <4 x float> [[MUL18_I_I]]) +// CHECK-NEXT: [[TMP48:%.*]] = fsub <4 x float> [[TMP47]], [[TMP46]] +// CHECK-NEXT: [[SUB21_I_I:%.*]] = fadd <4 x float> [[SUB16_I_I]], [[TMP48]] +// CHECK-NEXT: [[TMP49:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 1) +// CHECK-NEXT: [[TMP50:%.*]] = icmp eq <4 x i32> [[TMP49]], zeroinitializer +// CHECK-NEXT: [[COND_V_I_I:%.*]] = select <4 x i1> [[TMP50]], <4 x float> [[SUB21_I_I]], <4 x float> [[FNEG_I_I]] +// CHECK-NEXT: [[COND_I_I:%.*]] = bitcast <4 x float> [[COND_V_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp ne <4 x i32> [[COND6_I_I]], zeroinitializer +// CHECK-NEXT: [[SEXT6_I_I:%.*]] = sext <4 x i1> [[CMP5_I_I]] to <4 x i32> +// CHECK-NEXT: [[SHL_I_I:%.*]] = shl nsw <4 x i32> [[SEXT6_I_I]], splat (i32 31) +// CHECK-NEXT: [[XOR_I_I:%.*]] = xor <4 x i32> [[SHL_I_I]], [[COND_I_I]] +// CHECK-NEXT: [[ASTYPE7_I_I:%.*]] = bitcast <4 x i32> [[XOR_I_I]] to <4 x float> +// CHECK-NEXT: [[TMP51:%.*]] = fcmp ueq <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x7FF0000000000000) +// CHECK-NEXT: [[COND_V_I_I_I:%.*]] = select <4 x i1> [[TMP51]], <4 x float> splat (float 0x7FF8000000000000), <4 x float> [[ASTYPE7_I_I]] +// CHECK-NEXT: store <4 x float> [[COND_V_I_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global float4 *f) { + *f = cos(*f); +} +//. +// CHECK: [[META6]] = !{i32 1} +// CHECK: [[META7]] = !{!"none"} +// CHECK: [[META8]] = !{!"float4*"} +// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*"} +// CHECK: [[META10]] = !{!""} +// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/libclc/test/math/fabs.cl b/libclc/test/math/fabs.cl new file mode 100644 index 0000000000000..5935fc9f62627 --- /dev/null +++ b/libclc/test/math/fabs.cl @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef align 4 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10:![0-9]+]] +// CHECK-NEXT: [[ELT_ABS_I_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[TMP0]]) +// CHECK-NEXT: store float [[ELT_ABS_I_I]], ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global float *f) { + *f = fabs(*f); +} +//. +// CHECK: [[META6]] = !{i32 1} +// CHECK: [[META7]] = !{!"none"} +// CHECK: [[META8]] = !{!"float*"} +// CHECK: [[META9]] = !{!""} +// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK: [[META11]] = !{!"float", [[META12:![0-9]+]], i64 0} +// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/libclc/test/math/rsqrt.cl b/libclc/test/math/rsqrt.cl new file mode 100644 index 0000000000000..fa5e13f2705f2 --- /dev/null +++ b/libclc/test/math/rsqrt.cl @@ -0,0 +1,48 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +#if defined(cl_khr_fp64) + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) initializes((16, 32)) [[X:%.*]], ptr addrspace(1) noundef align 32 captures(none) initializes((32, 64)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]), !fpmath [[META14:![0-9]+]] +// CHECK-NEXT: [[DIV_I_I:%.*]] = fdiv contract <4 x float> splat (float 1.000000e+00), [[TMP1]], !fpmath [[META15:![0-9]+]] +// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[X]], i64 16 +// CHECK-NEXT: store <4 x float> [[DIV_I_I]], ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr addrspace(1) [[Y]], align 32, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call contract <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP2]]) +// CHECK-NEXT: [[DIV_I_I1:%.*]] = fdiv contract <4 x double> splat (double 1.000000e+00), [[TMP3]] +// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[Y]], i64 32 +// CHECK-NEXT: store <4 x double> [[DIV_I_I1]], ptr addrspace(1) [[ARRAYIDX4_I]], align 32, !tbaa [[TBAA11]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global float4 *x, __global double4 *y) { + x[1] = rsqrt(x[0]); + y[1] = rsqrt(y[0]); +} + +#endif +//. +// CHECK: [[META6]] = !{i32 1, i32 1} +// CHECK: [[META7]] = !{!"none", !"none"} +// CHECK: [[META8]] = !{!"float4*", !"double4*"} +// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*", !"double __attribute__((ext_vector_type(4)))*"} +// CHECK: [[META10]] = !{!"", !""} +// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META14]] = !{float 3.000000e+00} +// CHECK: [[META15]] = !{float 2.500000e+00} +//. diff --git a/libclc/test/misc/as_type.cl b/libclc/test/misc/as_type.cl new file mode 100644 index 0000000000000..a475956e62d0a --- /dev/null +++ b/libclc/test/misc/as_type.cl @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global int4 *x, __global float4 *y) { + *x = as_int4(*y); +} +//. +// CHECK: [[META6]] = !{i32 1, i32 1} +// CHECK: [[META7]] = !{!"none", !"none"} +// CHECK: [[META8]] = !{!"int4*", !"float4*"} +// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"} +// CHECK: [[META10]] = !{!"", !""} +// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/libclc/test/misc/convert.cl b/libclc/test/misc/convert.cl new file mode 100644 index 0000000000000..cd8c41465ed38 --- /dev/null +++ b/libclc/test/misc/convert.cl @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[CONV_I_I:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: store <4 x i32> [[CONV_I_I]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global int4 *x, __global float4 *y) { + *x = convert_int4(*y); +} +//. +// CHECK: [[META6]] = !{i32 1, i32 1} +// CHECK: [[META7]] = !{!"none", !"none"} +// CHECK: [[META8]] = !{!"int4*", !"float4*"} +// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"} +// CHECK: [[META10]] = !{!"", !""} +// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/libclc/test/rsqrt.cl b/libclc/test/rsqrt.cl deleted file mode 100644 index 4eebfe8ecf7f9..0000000000000 --- a/libclc/test/rsqrt.cl +++ /dev/null @@ -1,14 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -__kernel void foo(float4 *x, double4 *y) { - x[1] = rsqrt(x[0]); - y[1] = rsqrt(y[0]); -} diff --git a/libclc/test/subsat.cl b/libclc/test/subsat.cl deleted file mode 100644 index 5e6fbdcfbef9e..0000000000000 --- a/libclc/test/subsat.cl +++ /dev/null @@ -1,27 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -__kernel void test_subsat_char(char *a, char x, char y) { - *a = sub_sat(x, y); - return; -} - -__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) { - *a = sub_sat(x, y); - return; -} - -__kernel void test_subsat_long(long *a, long x, long y) { - *a = sub_sat(x, y); - return; -} - -__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) { - *a = sub_sat(x, y); - return; -} \ No newline at end of file diff --git a/libclc/test/work-item/get_group_id.cl b/libclc/test/work-item/get_group_id.cl new file mode 100644 index 0000000000000..f73f8f76cd519 --- /dev/null +++ b/libclc/test/work-item/get_group_id.cl @@ -0,0 +1,33 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s + +// CHECK-LABEL: define protected amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 4 captures(none) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.x() +// CHECK-NEXT: [[RETVAL_0_I:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(1) [[I]], i64 [[RETVAL_0_I]] +// CHECK-NEXT: store i32 1, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA10:![0-9]+]] +// CHECK-NEXT: ret void +// +__kernel void foo(__global int *i) { + i[get_group_id(0)] = 1; +} +//. +// CHECK: [[META6]] = !{i32 1} +// CHECK: [[META7]] = !{!"none"} +// CHECK: [[META8]] = !{!"int*"} +// CHECK: [[META9]] = !{!""} +// CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK: [[META11]] = !{!"int", [[META12:![0-9]+]], i64 0} +// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} +// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +//. >From e79ba7281bd21e8a145d325a706db8e56d7468dc Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 7 Jul 2025 13:01:49 +0100 Subject: [PATCH 9/9] fix CI? --- .ci/compute_projects.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.ci/compute_projects.py b/.ci/compute_projects.py index c3cf714ce6c10..4c268b7221663 100644 --- a/.ci/compute_projects.py +++ b/.ci/compute_projects.py @@ -46,6 +46,7 @@ "mlir", "polly", "flang", + "libclc", }, "lld": {"bolt", "cross-project-tests"}, # TODO(issues/132795): LLDB should be enabled on clang changes. @@ -75,7 +76,7 @@ # This mapping describes runtimes that should be tested when the key project is # touched. DEPENDENT_RUNTIMES_TO_TEST = { - "clang": {"compiler-rt"}, + "clang": {"compiler-rt", "libclc"}, "clang-tools-extra": {"libc"}, "libc": {"libc"}, ".ci": {"compiler-rt", "libc"}, @@ -132,6 +133,7 @@ "lld": "check-lld", "flang": "check-flang", "libc": "check-libc", + "libclc": "check-libclc", "lld": "check-lld", "lldb": "check-lldb", "mlir": "check-mlir", @@ -139,7 +141,7 @@ "polly": "check-polly", } -RUNTIMES = {"libcxx", "libcxxabi", "libunwind", "compiler-rt", "libc"} +RUNTIMES = {"libcxx", "libcxxabi", "libunwind", "compiler-rt", "libc", "libclc"} def _add_dependencies(projects: Set[str], runtimes: Set[str]) -> Set[str]: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits