[llvm-branch-commits] [llvm] llvm-reduce: Fix losing fast math flags in operands-to-args (PR #133421)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/133421 >From f7605bad9a66743461e4ab1e00123615f3fadfa8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Mar 2025 18:00:05 +0700 Subject: [PATCH] llvm-reduce: Fix losing fast math flags in operands-to-args --- .../operands-to-args-preserve-fmf.ll | 20 +++ .../deltas/ReduceOperandsToArgs.cpp | 4 2 files changed, 24 insertions(+) create mode 100644 llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll diff --git a/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll b/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll new file mode 100644 index 0..b4b19ca28dbb5 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll @@ -0,0 +1,20 @@ +; RUN: llvm-reduce %s -o %t --abort-on-invalid-reduction --delta-passes=operands-to-args --test FileCheck --test-arg %s --test-arg --check-prefix=INTERESTING --test-arg --input-file +; RUN: FileCheck %s --input-file %t --check-prefix=REDUCED + +; INTERESTING-LABEL: define float @callee( +; INTERESTING: fadd float +define float @callee(float %a) { + %x = fadd float %a, 1.0 + ret float %x +} + +; INTERESTING-LABEL: define float @caller( +; INTERESTING: load float + +; REDUCED-LABEL: define float @caller(ptr %ptr, float %val, float %callee.ret1) { +; REDUCED: %callee.ret12 = call nnan nsz float @callee(float %val, float 0.00e+00) +define float @caller(ptr %ptr) { + %val = load float, ptr %ptr + %callee.ret = call nnan nsz float @callee(float %val) + ret float %callee.ret +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp index 037ff15fae0f6..e7ad52eb65a5d 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -107,6 +108,9 @@ static void replaceFunctionCalls(Function *OldF, Function *NewF) { NewCI->setCallingConv(NewF->getCallingConv()); NewCI->setAttributes(CI->getAttributes()); +if (auto *FPOp = dyn_cast(NewCI)) + NewCI->setFastMathFlags(CI->getFastMathFlags()); + // Do the replacement for this use. if (!CI->use_empty()) CI->replaceAllUsesWith(NewCI); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/133627 >From 3e646d940a2fece2624de27e223639cd2546b694 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Mar 2025 14:33:36 +0700 Subject: [PATCH] llvm-reduce: Reduce with early return of arguments Extend the instruction -> return reduction with one that inserts return of function arguments. Not sure how useful this really is. This has more freedom since we could insert the return anywhere in the function, but this just inserts the return in the entry block. --- .../reduce-values-to-return-args.ll | 77 +++ ...-values-to-return-nonvoid-noncallee-use.ll | 2 +- .../llvm-reduce/reduce-values-to-return.ll| 2 +- llvm/tools/llvm-reduce/DeltaPasses.def| 5 +- .../deltas/ReduceValuesToReturn.cpp | 42 +- .../llvm-reduce/deltas/ReduceValuesToReturn.h | 3 +- 6 files changed, 124 insertions(+), 7 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll new file mode 100644 index 0..abbc643822033 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll @@ -0,0 +1,77 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=arguments-to-return --test FileCheck --test-arg --check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefixes=RESULT %s < %t + + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret void +} + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return_existing_ret( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret i32 0 +} + +; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return( +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg +define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +entry: + br i1 %cond0, label %bb0, label %bb1 + +bb0: + %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ] + store i32 %arg, ptr %ptr0 + store i32 %phi, ptr %ptr1 + br label %bb1 + +bb1: + br i1 %cond1, label %bb0, label %bb2 + +bb2: + ret void +} + +; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) { +; INTERESTING: %arg1 + +; RESULT-LABEL: define ptr @keep_second_arg( +; RESULT-NEXT: ret ptr %arg1 +; RESULT-NEXT: } +define void @keep_second_arg(i32 %arg0, ptr %arg1) { + store i32 %arg0, ptr %arg1 + ret void +} + +; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; INTERESTING: i32 %arg2 + +; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg2 +define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +entry: + br i1 %arg0, label %bb0, label %bb1 + +bb0: + store i32 %arg2, ptr %arg1 + ret void + +bb1: + ret void +} diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll index 215ea97a8be91..11166479318c6 100644 --- a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll @@ -1,7 +1,7 @@ ; Make sure we don't break on non-callee uses of funtions with a ; non-void return type. -; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=values-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t ; RUN: FileCheck --check-prefix=RESULT %s < %t ; INTERESTING-LABEL: @interesting( diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll index 0c36db8ebc278..2af87aad05169 100644 --- a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll @@ -1,7 +1,7 @@ ; Test that llvm-reduce can move intermediate values by inserting ; ear
[llvm-branch-commits] [libcxx] [libc++] Guard additional headers with _LIBCPP_HAS_LOCALIZATION (#131921) (PR #134406)
https://github.com/ldionne updated https://github.com/llvm/llvm-project/pull/134406 >From abb3af9e0f26cae7583efe2591fe870da7b87fa7 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 4 Apr 2025 11:48:46 -0400 Subject: [PATCH] [libc++] Guard additional headers with _LIBCPP_HAS_LOCALIZATION (#131921) There were some remaining headers that were not guarded with _LIBCPP_HAS_LOCALIZATION, leading to errors when trying to use modules on platforms that don't support localization (since all the headers get pulled in when building the 'std' module). This patch brings these headers in line with what we do for every other header that depends on localization. This patch also requires including from <__configuration/platform.h> in order to define _NEWLIB_VERSION. In the long term, we should use a better approach for doing that, such as defining a macro in the __config_site header. (cherry picked from commit 4090910a695efcba4b484e9f8ad2b564e9a4e7ed) --- libcxx/include/__configuration/platform.h | 7 + libcxx/include/__locale | 153 - libcxx/include/__locale_dir/locale_base_api.h | 112 +++ libcxx/include/fstream| 55 ++-- libcxx/include/regex | 290 +- libcxx/include/strstream | 55 ++-- .../configs/armv7m-picolibc-libc++.cfg.in | 4 - .../test/libcxx/system_reserved_names.gen.py | 6 + 8 files changed, 356 insertions(+), 326 deletions(-) diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h index 8d0f8f63f5213..f3c199dee172b 100644 --- a/libcxx/include/__configuration/platform.h +++ b/libcxx/include/__configuration/platform.h @@ -42,6 +42,13 @@ # endif #endif +// This is required in order for _NEWLIB_VERSION to be defined in places where we use it. +// TODO: We shouldn't be including arbitrarily-named headers from libc++ since this can break valid +// user code. Move code paths that need _NEWLIB_VERSION to another customization mechanism. +#if __has_include() +# include +#endif + #ifndef __BYTE_ORDER__ # error \ "Your compiler doesn't seem to define __BYTE_ORDER__, which is required by libc++ to know the endianness of your target platform" diff --git a/libcxx/include/__locale b/libcxx/include/__locale index dfe79d5e506f1..93187dc1d0d9c 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -11,6 +11,9 @@ #define _LIBCPP___LOCALE #include <__config> + +#if _LIBCPP_HAS_LOCALIZATION + #include <__locale_dir/locale_base_api.h> #include <__memory/shared_count.h> #include <__mutex/once_flag.h> @@ -24,18 +27,18 @@ #include // Some platforms require more includes than others. Keep the includes on all plaforms for now. -#include -#include +# include +# include -#if _LIBCPP_HAS_WIDE_CHARACTERS -# include -#else -# include <__std_mbstate_t.h> -#endif +# if _LIBCPP_HAS_WIDE_CHARACTERS +#include +# else +#include <__std_mbstate_t.h> +# endif -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -85,9 +88,9 @@ public: // locale operations: string name() const; bool operator==(const locale&) const; -#if _LIBCPP_STD_VER <= 17 +# if _LIBCPP_STD_VER <= 17 _LIBCPP_HIDE_FROM_ABI bool operator!=(const locale& __y) const { return !(*this == __y); } -#endif +# endif template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS bool operator()(const basic_string<_CharT, _Traits, _Allocator>&, const basic_string<_CharT, _Traits, _Allocator>&) const; @@ -237,9 +240,9 @@ long collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) cons } extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#endif +# endif // template class collate_byname; @@ -264,7 +267,7 @@ protected: string_type do_transform(const char_type* __lo, const char_type* __hi) const override; }; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI collate_byname : public collate { __locale::__locale_t __l_; @@ -283,7 +286,7 @@ protected: const char_type* __lo1, const char_type* __hi1, const char_type* __lo2, const char_type* __hi2) const override; string_type do_transform(const char_type* __lo, const char_type* __hi) const override; }; -#endif +# endif template bool locale::operator()(const basic_string<_CharT, _Traits, _Allocator>& __x, @@ -296,7 +299,7 @@ bool locale::operator()(const basic_string<_CharT, _Traits, _Allocator>& __x, class _LIBCPP_EXPORTED_
[llvm-branch-commits] [llvm] llvm-reduce: Fix overly conservative operands-to-args user restriction (PR #133854)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/133854 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 8392666 - Revert "[cmake] Refactor clang unittest cmake (#133545)"
Author: dpalermo Date: 2025-04-01T22:15:57-05:00 New Revision: 8392666bb92c87127db2ecbb85523bd64bfabcd7 URL: https://github.com/llvm/llvm-project/commit/8392666bb92c87127db2ecbb85523bd64bfabcd7 DIFF: https://github.com/llvm/llvm-project/commit/8392666bb92c87127db2ecbb85523bd64bfabcd7.diff LOG: Revert "[cmake] Refactor clang unittest cmake (#133545)" This reverts commit 5ffd9bdb50b5753bbf668e4eab3647dfb46cd0d6. Added: Modified: clang/unittests/AST/ByteCode/CMakeLists.txt clang/unittests/AST/CMakeLists.txt clang/unittests/ASTMatchers/CMakeLists.txt clang/unittests/ASTMatchers/Dynamic/CMakeLists.txt clang/unittests/Analysis/CMakeLists.txt clang/unittests/Analysis/FlowSensitive/CMakeLists.txt clang/unittests/Basic/CMakeLists.txt clang/unittests/CMakeLists.txt clang/unittests/CodeGen/CMakeLists.txt clang/unittests/CrossTU/CMakeLists.txt clang/unittests/DirectoryWatcher/CMakeLists.txt clang/unittests/Driver/CMakeLists.txt clang/unittests/Format/CMakeLists.txt clang/unittests/Frontend/CMakeLists.txt clang/unittests/Index/CMakeLists.txt clang/unittests/InstallAPI/CMakeLists.txt clang/unittests/Interpreter/CMakeLists.txt clang/unittests/Interpreter/ExceptionTests/CMakeLists.txt clang/unittests/Lex/CMakeLists.txt clang/unittests/Rewrite/CMakeLists.txt clang/unittests/Sema/CMakeLists.txt clang/unittests/Serialization/CMakeLists.txt clang/unittests/StaticAnalyzer/CMakeLists.txt clang/unittests/Support/CMakeLists.txt clang/unittests/Tooling/CMakeLists.txt clang/unittests/Tooling/Syntax/CMakeLists.txt clang/unittests/libclang/CMakeLists.txt clang/unittests/libclang/CrashTests/CMakeLists.txt Removed: diff --git a/clang/unittests/AST/ByteCode/CMakeLists.txt b/clang/unittests/AST/ByteCode/CMakeLists.txt index 7ccadda2eeb26..b862fb4834fbd 100644 --- a/clang/unittests/AST/ByteCode/CMakeLists.txt +++ b/clang/unittests/AST/ByteCode/CMakeLists.txt @@ -2,13 +2,19 @@ add_clang_unittest(InterpTests BitcastBuffer.cpp Descriptor.cpp toAPValue.cpp - CLANG_LIBS + ) + +clang_target_link_libraries(InterpTests + PRIVATE clangAST clangASTMatchers clangBasic clangFrontend clangSerialization clangTooling - LINK_LIBS - clangTesting ) + + target_link_libraries(InterpTests + PRIVATE + clangTesting +) diff --git a/clang/unittests/AST/CMakeLists.txt b/clang/unittests/AST/CMakeLists.txt index f27d34e8a0719..bfa6082a6ffa4 100644 --- a/clang/unittests/AST/CMakeLists.txt +++ b/clang/unittests/AST/CMakeLists.txt @@ -1,3 +1,10 @@ +set(LLVM_LINK_COMPONENTS + FrontendOpenMP + Support + TargetParser + ) + + add_subdirectory(ByteCode) add_clang_unittest(ASTTests @@ -36,7 +43,10 @@ add_clang_unittest(ASTTests TemplateNameTest.cpp TypePrinterTest.cpp UnresolvedSetTest.cpp - CLANG_LIBS + ) + +clang_target_link_libraries(ASTTests + PRIVATE clangAST clangASTMatchers clangBasic @@ -44,12 +54,11 @@ add_clang_unittest(ASTTests clangLex clangSerialization clangTooling - LINK_LIBS + ) + +target_link_libraries(ASTTests + PRIVATE clangTesting LLVMTestingAnnotations LLVMTestingSupport - LLVM_COMPONENTS - FrontendOpenMP - Support - TargetParser - ) +) diff --git a/clang/unittests/ASTMatchers/CMakeLists.txt b/clang/unittests/ASTMatchers/CMakeLists.txt index 47bd5c108bb5a..6a1e629d81b65 100644 --- a/clang/unittests/ASTMatchers/CMakeLists.txt +++ b/clang/unittests/ASTMatchers/CMakeLists.txt @@ -1,23 +1,31 @@ +set(LLVM_LINK_COMPONENTS + FrontendOpenMP + Support + TargetParser + ) + add_clang_unittest(ASTMatchersTests ASTMatchersInternalTest.cpp ASTMatchersNodeTest.cpp ASTMatchersNarrowingTest.cpp ASTMatchersTraversalTest.cpp GtestMatchersTest.cpp - CLANG_LIBS + ) + +clang_target_link_libraries(ASTMatchersTests + PRIVATE clangAST clangASTMatchers clangBasic clangFrontend clangSerialization clangTooling - LINK_LIBS + ) + +target_link_libraries(ASTMatchersTests + PRIVATE clangTesting LLVMTestingSupport - LLVM_COMPONENTS - FrontendOpenMP - Support - TargetParser - ) +) add_subdirectory(Dynamic) diff --git a/clang/unittests/ASTMatchers/Dynamic/CMakeLists.txt b/clang/unittests/ASTMatchers/Dynamic/CMakeLists.txt index b6db7ce62afe7..6d0e12bcb0759 100644 --- a/clang/unittests/ASTMatchers/Dynamic/CMakeLists.txt +++ b/clang/unittests/ASTMatchers/Dynamic/CMakeLists.txt @@ -1,8 +1,16 @@ +set(LLVM_LINK_COMPONENTS + FrontendOpenMP + Support + ) + add_clang_unittest(DynamicASTMatchersTests VariantValueTest.cpp ParserTest.cpp RegistryTest.cpp - CLANG_LIBS + ) + +clang_target_link_libraries(DynamicASTMatchersTests + PRIVATE clangAST clangASTMatchers clangBasic @@ -10,9 +18,9 @@ add_clang_unittest(DynamicASTMatchersTests clangFrontend clangSerialization clangTooling - LINK_LIBS + ) +
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
@@ -489,22 +489,61 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}}); addRulesForGOpcs({G_ANYEXT}) + .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away - .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}); + .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away + .Any({{{DivS16, S1}}, {{Vgpr16}, {Vcc}, VccExtToSel}}) + .Any({{{DivS32, S1}}, {{Vgpr32}, {Vcc}, VccExtToSel}}) + .Any({{{DivS64, S1}}, {{Vgpr64}, {Vcc}, VccExtToSel}}) + .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}}) Pierre-vh wrote: unrelated to the patch: These should be better documented, otherwise it's very hard to read what's actually happening here. I had to go find 2 different struct signatures before getting an idea of what these lines do. A small comment on top `RegBankLegalizeRules` that explains how many braces are needed and how the arguments are laid out could go a long way. I also feel like we could eliminate one or even two sets of braces by just making them arguments, further helping readability. It could just be an overload that's preferred when manually writing the rules, and keep the current signature if we're pushing rules using a loop or something? https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Fix deployment targets that were incorrectly bumped (#134278) (PR #134435)
llvmbot wrote: @llvm/pr-subscribers-libcxx Author: None (llvmbot) Changes Backport a97f73405f8e074263a0ed2dd2b8c87c014f46d9 Requested by: @ldionne --- Full diff: https://github.com/llvm/llvm-project/pull/134435.diff 1 Files Affected: - (modified) libcxx/include/__configuration/availability.h (+8-8) ``diff diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h index 261cf9c1ae9d8..f9e52a690c05c 100644 --- a/libcxx/include/__configuration/availability.h +++ b/libcxx/include/__configuration/availability.h @@ -163,10 +163,10 @@ __attribute__((availability(driverkit, strict, introduced = 23.0))) // LLVM 15 -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130400) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160500) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160500) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90500) ||\ +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130300) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160300) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160300) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90300) ||\ (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 70500) || \ (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 220400) #define _LIBCPP_INTRODUCED_IN_LLVM_15 0 @@ -174,10 +174,10 @@ #define _LIBCPP_INTRODUCED_IN_LLVM_15 1 # endif # define _LIBCPP_INTRODUCED_IN_LLVM_15_ATTRIBUTE \ -__attribute__((availability(macos, strict, introduced = 13.4))) \ -__attribute__((availability(ios, strict, introduced = 16.5))) \ -__attribute__((availability(tvos, strict, introduced = 16.5))) \ -__attribute__((availability(watchos, strict, introduced = 9.5))) \ +__attribute__((availability(macos, strict, introduced = 13.3))) \ +__attribute__((availability(ios, strict, introduced = 16.3))) \ +__attribute__((availability(tvos, strict, introduced = 16.3))) \ +__attribute__((availability(watchos, strict, introduced = 9.3))) \ __attribute__((availability(bridgeos, strict, introduced = 7.5))) \ __attribute__((availability(driverkit, strict, introduced = 22.4))) `` https://github.com/llvm/llvm-project/pull/134435 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SDAG] Introduce inbounds flag for pointer arithmetic (PR #131862)
ritter-x2a wrote: > I think if we add something called "inbounds" to SelectionDAG, it will > inevitably get misused for other purposes, though. Wouldn't we have any problems that would introduce already with inbounds GEPs in the IR? Assuming we introduce ISD::PTRADD and only give inbounds a meaning for those nodes, the situation should be the same as in the middle end, right? https://github.com/llvm/llvm-project/pull/131862 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctxprof] Use `isInSpecializedModule` as criteria for using contextual profile (PR #134468)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/134468 None >From e935ffab268301aa1b23f41facd9f800c8842a96 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 4 Apr 2025 13:18:32 -0700 Subject: [PATCH] [ctxprof] Use `isInSpecializedModule` as criteria for using contextual profile --- llvm/lib/Transforms/IPO/ModuleInliner.cpp | 6 +++--- llvm/lib/Transforms/Utils/InlineFunction.cpp| 2 +- llvm/test/Analysis/CtxProfAnalysis/handle-select.ll | 8 llvm/test/Analysis/CtxProfAnalysis/inline.ll| 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index 480de5fe4b553..844e27590e501 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -171,8 +171,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, << setIsVerbose(); }); } -} else if (CtxProfPromoteAlwaysInline && !CtxProf.contexts().empty() && - CB->isIndirectCall()) { +} else if (CtxProfPromoteAlwaysInline && + CtxProf.isInSpecializedModule() && CB->isIndirectCall()) { CtxProfAnalysis::collectIndirectCallPromotionList(*CB, CtxProf, ICPCandidates); } @@ -260,7 +260,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, // iteration because the next iteration may not happen and we may // miss inlining it. // FIXME: enable for ctxprof. - if (CtxProf.contexts().empty()) + if (CtxProf.isInSpecializedModule()) if (tryPromoteCall(*ICB)) NewCallee = ICB->getCalledFunction(); } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 131fbe654c11c..5beee1f681b81 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2356,7 +2356,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime, Function *ForwardVarArgsTo) { - if (CtxProf.contexts().empty()) + if (!CtxProf.isInSpecializedModule()) return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, ForwardVarArgsTo); diff --git a/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll b/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll index dfbc5c9e60177..1880672580eb8 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll @@ -6,9 +6,9 @@ ; RUN: split-file %s %t ; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata ; -; RUN: opt -passes=ctx-instr-gen %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=INSTR -; RUN: opt -passes=ctx-instr-gen,module-inline %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=POST-INL -; RUN: opt -passes=ctx-instr-gen,module-inline,ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=FLATTEN +; RUN: opt -passes=ctx-instr-gen %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=INSTR +; RUN: opt -passes=ctx-instr-gen,module-inline %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=POST-INL +; RUN: opt -passes=ctx-instr-gen,module-inline,ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=FLATTEN ; INSTR-LABEL: yes: ; INSTR-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1) @@ -45,7 +45,7 @@ ; entry count of that BB is 4. ; ![[SELPROF]] = !{!"branch_weights", i32 3, i32 1} -;--- example.ll +;--- 1234.ll define i32 @foo(i32 %t) !guid !0 { %test = icmp slt i32 %t, 0 br i1 %test, label %yes, label %no diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll index 836ec8b2e8a37..a069acee1c943 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll @@ -3,7 +3,7 @@ ; RUN: split-file %s %t ; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata -; RUN: opt -passes='module-inline,print' -ctx-profile-printer-level=everything %t/module.ll -S \ +; RUN: opt -passes='module-inline,print' -ctx-profile-printer-level=everything %t/1000.ll -S \ ; RUN: -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=yaml \ ; RUN: -o - 2> %t/profile-final.yaml | FileCheck %s ; RUN: diff %t/pro
[llvm-branch-commits] [clang] 1237245 - Revert "[Clang] [NFC] Introduce a helper for emitting compatibility diagnosti…"
Author: Sirraide Date: 2025-04-02T08:28:09+02:00 New Revision: 12372457f6284197ba610616c3fdd4ddea937d5c URL: https://github.com/llvm/llvm-project/commit/12372457f6284197ba610616c3fdd4ddea937d5c DIFF: https://github.com/llvm/llvm-project/commit/12372457f6284197ba610616c3fdd4ddea937d5c.diff LOG: Revert "[Clang] [NFC] Introduce a helper for emitting compatibility diagnosti…" This reverts commit 9d06e0879b5600b19cd8cebd98e4f92b5e62400f. Added: Modified: clang/include/clang/Basic/CMakeLists.txt clang/include/clang/Basic/Diagnostic.td clang/include/clang/Basic/DiagnosticAST.h clang/include/clang/Basic/DiagnosticAnalysis.h clang/include/clang/Basic/DiagnosticComment.h clang/include/clang/Basic/DiagnosticCrossTU.h clang/include/clang/Basic/DiagnosticDriver.h clang/include/clang/Basic/DiagnosticFrontend.h clang/include/clang/Basic/DiagnosticIDs.h clang/include/clang/Basic/DiagnosticInstallAPI.h clang/include/clang/Basic/DiagnosticLex.h clang/include/clang/Basic/DiagnosticParse.h clang/include/clang/Basic/DiagnosticParseKinds.td clang/include/clang/Basic/DiagnosticRefactoring.h clang/include/clang/Basic/DiagnosticSema.h clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/DiagnosticSerialization.h clang/include/clang/Parse/Parser.h clang/include/clang/Sema/SemaBase.h clang/lib/Basic/DiagnosticIDs.cpp clang/lib/Parse/ParseDecl.cpp clang/lib/Parse/Parser.cpp clang/lib/Sema/SemaBase.cpp clang/lib/Sema/SemaDecl.cpp clang/lib/Sema/SemaDeclCXX.cpp clang/lib/Sema/SemaExpr.cpp clang/lib/Sema/SemaTemplate.cpp clang/test/Misc/show-diag-options.c clang/utils/TableGen/ClangDiagnosticsEmitter.cpp clang/utils/TableGen/TableGen.cpp clang/utils/TableGen/TableGenBackends.h Removed: diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 6be6d063c20b4..a671d5c764c22 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -8,11 +8,6 @@ macro(clang_diag_gen component) -gen-clang-diags-enums -clang-component=${component} SOURCE Diagnostic.td TARGET ClangDiagnostic${component}Enums) - - clang_tablegen(Diagnostic${component}CompatIDs.inc --gen-clang-diags-compat-ids -clang-component=${component} -SOURCE Diagnostic.td -TARGET ClangDiagnostic${component}CompatIDs) endmacro(clang_diag_gen) clang_diag_gen(Analysis) @@ -36,11 +31,6 @@ clang_tablegen(DiagnosticIndexName.inc -gen-clang-diags-index-name SOURCE Diagnostic.td TARGET ClangDiagnosticIndexName) -clang_tablegen(DiagnosticAllCompatIDs.inc - -gen-clang-diags-compat-ids - SOURCE Diagnostic.td - TARGET ClangDiagnosticAllCompatIDs) - clang_tablegen(AttrList.inc -gen-clang-attr-list -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE Attr.td diff --git a/clang/include/clang/Basic/Diagnostic.td b/clang/include/clang/Basic/Diagnostic.td index 65b19f3feea4f..b31d846210a8e 100644 --- a/clang/include/clang/Basic/Diagnostic.td +++ b/clang/include/clang/Basic/Diagnostic.td @@ -155,19 +155,6 @@ class DefaultWarnNoWerror { } class DefaultRemark { Severity DefaultSeverity = SEV_Remark; } -class CompatWarningId { - string Component = ?; - string Name = name; - string Diag = diag; - string DiagPre = diag_pre; - int Std = std; - - // This is unused, but Tablegen will complain if it's missing because we define - // the compatibility ids in the same place as the other diagnostics (which means - // that we'll be inside a 'let CategoryName = "" in { ... }' block). - string CategoryName = ?; -} - // C++ compatibility warnings. multiclass CXXCompat< string message, @@ -191,11 +178,6 @@ multiclass CXXCompat< "CXX98Compat", "CXXPre"#std_ver#"Compat"))>, DefaultIgnore; - -def : CompatWarningId< -NAME, std_ver, -"compat_cxx"#std_ver#"_"#NAME, -"compat_pre_cxx"#std_ver#"_"#NAME>; } // These generate pairs of C++ compatibility warnings of the form: diff --git a/clang/include/clang/Basic/DiagnosticAST.h b/clang/include/clang/Basic/DiagnosticAST.h index 41e2598f7cc3b..4f82114b7406b 100644 --- a/clang/include/clang/Basic/DiagnosticAST.h +++ b/clang/include/clang/Basic/DiagnosticAST.h @@ -36,18 +36,6 @@ enum { #undef DIAG_ENUM_ITEM #undef DIAG_ENUM } // end namespace diag - -namespace diag_compat { -#define DIAG_COMPAT_IDS_BEGIN() enum { -#define DIAG_COMPAT_IDS_END() \ - } \ - ; -#define DIAG_COMPAT_ID(IDX, NAME, ...) NAME = IDX, -#include "clang/Basic/DiagnosticASTCompatIDs.inc" -#undef DIAG_COMPAT_ID -#undef DIAG_COMPAT_IDS_BEGIN -#undef DIAG_COMPAT_IDS_END -}
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
redstar wrote: I refactored the code in the suggested way. Some notes: - I section to emit the code/data is either the ED or the PR. The other sections are linked via the `Parent` pointer. `MCGOFFStreamer::changeSection()` makes sure that all sections are registered in the correct order. - I use the ordinal number +1 of the section as the section number, but still need a counter for the symbols. - I still need to check that the LD symbols are emitted in the correct order. - The textual output is currently not correct. https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Fix deployment targets that were incorrectly bumped (#134278) (PR #134435)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/134435 Backport a97f73405f8e074263a0ed2dd2b8c87c014f46d9 Requested by: @ldionne >From 98cff83467d2e63d6805cb4d138a785865a45e34 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 4 Apr 2025 14:56:26 -0400 Subject: [PATCH] [libc++] Fix deployment targets that were incorrectly bumped (#134278) When I introduced the various `_LIBCPP_INTRODUCED_IN_LLVM_XY_ATTRIBUTE` macros in 182f5e9b2f03, I tried to correlate them to the right OS versions, but it seems that I made a few mistakes. This wasn't caught in the CI because we don't test back-deployment that far. rdar://148405946 (cherry picked from commit a97f73405f8e074263a0ed2dd2b8c87c014f46d9) --- libcxx/include/__configuration/availability.h | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h index 261cf9c1ae9d8..f9e52a690c05c 100644 --- a/libcxx/include/__configuration/availability.h +++ b/libcxx/include/__configuration/availability.h @@ -163,10 +163,10 @@ __attribute__((availability(driverkit, strict, introduced = 23.0))) // LLVM 15 -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130400) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160500) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160500) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90500) ||\ +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130300) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160300) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160300) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90300) ||\ (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 70500) || \ (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 220400) #define _LIBCPP_INTRODUCED_IN_LLVM_15 0 @@ -174,10 +174,10 @@ #define _LIBCPP_INTRODUCED_IN_LLVM_15 1 # endif # define _LIBCPP_INTRODUCED_IN_LLVM_15_ATTRIBUTE \ -__attribute__((availability(macos, strict, introduced = 13.4))) \ -__attribute__((availability(ios, strict, introduced = 16.5))) \ -__attribute__((availability(tvos, strict, introduced = 16.5))) \ -__attribute__((availability(watchos, strict, introduced = 9.5))) \ +__attribute__((availability(macos, strict, introduced = 13.3))) \ +__attribute__((availability(ios, strict, introduced = 16.3))) \ +__attribute__((availability(tvos, strict, introduced = 16.3))) \ +__attribute__((availability(watchos, strict, introduced = 9.3))) \ __attribute__((availability(bridgeos, strict, introduced = 7.5))) \ __attribute__((availability(driverkit, strict, introduced = 22.4))) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] Add a missing include for __bit_iterator (#127015) (PR #131382)
https://github.com/ldionne approved this pull request. Now that we have a better understanding that this is a workaround for an issue, how to reproduce the issue, etc, I am comfortable with cherry-picking this to LLVM 20. I don't think this is necessarily the workaround we'll want to keep forever, but I think this is safe and useful to cherry-pick. See https://github.com/llvm/llvm-project/pull/127015#issuecomment-2733570775 for more details. https://github.com/llvm/llvm-project/pull/131382 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopInterchange] Improve profitability check for vectorization (PR #133672)
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/133672 >From 692e4de4f84281f8c2bc5f7278f8066929df3cd6 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Thu, 27 Mar 2025 10:45:26 + Subject: [PATCH] [LoopInterchange] Improve profitability check for vectorization The vectorization profitability has a process to check whether a given loop can be vectorized or not. Since the process is conservative, a loop that can be vectorized may be deemed not to be possible. This can trigger unnecessary exchanges. This patch improves the profitability decision by mitigating such misjudgments. Before this patch, we considered a loop to be vectorizable only when there are no loop carried dependencies with the IV of the loop. However, a loop carried dependency doesn't prevent vectorization if the distance is positive. This patch makes the vectorization check more accurate by allowing a loop with the positive dependency. Note that it is difficult to make a complete decision whether a loop can be vectorized or not. To achieve this, we must check the vector width and the distance of dependency. --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 128 ++ .../profitability-vectorization-heuristic.ll | 8 +- 2 files changed, 106 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 1dccba4cfa7b8..078da53c52b52 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -80,6 +80,21 @@ enum class RuleTy { ForVectorization, }; +/// Store the information about if corresponding direction vector was negated +/// by normalization or not. This is necessary to restore the original one from +/// a row of a dependency matrix, because we only manage normalized direction +/// vectors and duplicate vectors are eliminated. So there may be both original +/// and negated vectors for a single entry (a row of dependency matrix). E.g., +/// if there are two direction vectors `[< =]` and `[> =]`, the later one will +/// be converted to the same as former one by normalization, so only `[< =]` +/// would be retained in the final result. +struct NegatedStatus { + bool Original = false; + bool Negated = false; + + bool isNonNegativeDir(char Dir) const; +}; + } // end anonymous namespace // Minimum loop depth supported. @@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) { } #endif -static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, - Loop *L, DependenceInfo *DI, - ScalarEvolution *SE, +static bool populateDependencyMatrix(CharMatrix &DepMatrix, + std::vector &NegStatusVec, + unsigned Level, Loop *L, + DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) { using ValueVector = SmallVector; @@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, return false; } ValueVector::iterator I, IE, J, JE; - StringSet<> Seen; + + // Manage all found direction vectors. and map it to the index of DepMatrix. + StringMap Seen; for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { for (J = I, JE = MemInstr.end(); J != JE; ++J) { @@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, assert(D->isOrdered() && "Expected an output, flow or anti dep."); // If the direction vector is negative, normalize it to // make it non-negative. -if (D->normalize(SE)) +bool Normalized = D->normalize(SE); +if (Normalized) LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n"); LLVM_DEBUG(StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; @@ -214,8 +233,17 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, } // Make sure we only add unique entries to the dependency matrix. -if (Seen.insert(StringRef(Dep.data(), Dep.size())).second) +unsigned Index = DepMatrix.size(); +auto [Ite, Inserted] = +Seen.try_emplace(StringRef(Dep.data(), Dep.size()), Index); +if (Inserted) { DepMatrix.push_back(Dep); + NegStatusVec.push_back(NegatedStatus{}); +} else + Index = Ite->second; + +
[llvm-branch-commits] [llvm] [ctxprof] Use `isInSpecializedModule` as criteria for using contextual profile (PR #134468)
mtrofin wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/134468?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#134468** https://app.graphite.dev/github/pr/llvm/llvm-project/134468?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/134468?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#134340** https://app.graphite.dev/github/pr/llvm/llvm-project/134340?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/134468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] gn build: Add check-builtins target. (PR #134482)
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/134482 >From bc8dda56bcfadc6d7312b53313159a978d71f4fb Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 4 Apr 2025 22:10:52 -0700 Subject: [PATCH] Remove unnecessary code Created using spr 1.3.6-beta.1 --- .../gn/secondary/compiler-rt/test/builtins/BUILD.gn | 13 - 1 file changed, 13 deletions(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn index 20618fb8da360..87848075a804e 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn @@ -6,18 +6,6 @@ import("//llvm/utils/gn/build/toolchain/compiler.gni") import("//llvm/utils/gn/build/write_cmake_config.gni") import("//llvm/version.gni") -if (current_toolchain == host_toolchain) { - write_cmake_config("builtins_cfg") { -input = "lit.site.cfg.py.in" -output = "$target_gen_dir/lit.site.cfg.py" -values = [ - "BUILTINS_LIT_SOURCE_DIR=" + rebase_path("."), - "COMPILER_RT_BINARY_DIR=" + rebase_path("$root_gen_dir/compiler-rt"), - "LIT_SITE_CFG_IN_HEADER=## Autogenerated from $input, do not edit", -] - } -} - write_cmake_config("builtins_mode_cfg") { input = "Unit/lit.site.cfg.py.in" output = @@ -54,7 +42,6 @@ write_cmake_config("builtins_mode_cfg") { if (current_toolchain != host_toolchain) { group("builtins_toolchain") { deps = [ - ":builtins_cfg($host_toolchain)", ":builtins_mode_cfg", "//compiler-rt/include($host_toolchain)", "//compiler-rt/lib/builtins", ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] gn build: Add check-builtins target. (PR #134482)
https://github.com/pcc created https://github.com/llvm/llvm-project/pull/134482 Tested on aarch64 Linux and x86_64 Linux. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopInterchange] Improve profitability check for vectorization (PR #133672)
@@ -80,6 +80,21 @@ enum class RuleTy { ForVectorization, }; +/// Store the information about if corresponding direction vector was negated sjoerdmeijer wrote: Before I keep reading the rest of this patch, just wanted to share this first question that I had. I was initially a bit confused about this, and was wondering why we need 2 booleans and 4 states if a direction vector's negated status can only be true or false. But I now guess that the complication here is the unique entries in the dependency matrix, is that right? If that is the case, then I am wondering if it isn't easier to keep all the entries and don't make them unique? Making them unique was a little optimisation that I added recently because I thought that would help, but if this is now complicating things and we need to do all sorts of gymnastics we might as well keep all entries. https://github.com/llvm/llvm-project/pull/133672 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] libcxx: In gdb test detect execute_mi with feature check instead of version check. (PR #132291)
https://github.com/pcc created https://github.com/llvm/llvm-project/pull/132291 The existing version check can lead to test failures on some distribution packages of gdb where not all components of the version number are integers, such as Fedora where gdb.VERSION can be something like "15.2-4.fc41". Fix it by replacing the version check with a feature check. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang][Backport] Demote mixed enumeration arithmetic error to a warning (PR #131853)
https://github.com/cor3ntin updated https://github.com/llvm/llvm-project/pull/131853 >From 11518ee84c9442934cd85ca99567a7f3fc3ba827 Mon Sep 17 00:00:00 2001 From: cor3ntin Date: Tue, 18 Mar 2025 16:45:37 +0100 Subject: [PATCH 1/2] [Clang][Backport] Demote mixed enumeration arithmetic error to a warning (#131811) In C++, defaulted to an error. C++ removed these features but the removal negatively impacts users. Fixes #92340 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 +- clang/lib/Sema/SemaExpr.cpp | 4 ++-- clang/test/SemaCXX/cxx2c-enum-compare.cpp| 5 +++-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 02292c10e6964..04e5d89dfcde1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -299,6 +299,9 @@ C++ Language Changes - The builtin type alias ``__builtin_common_type`` has been added to improve the performance of ``std::common_type``. +- In C++26 mode, arithmetic conversion errors between mixed enumeration types can + be disabled with ``-Wno-enum-enum-conversion`` (#GH92340). + C++2c Feature Support ^ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ec2a140e04d5b..6c93a46d8f36c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7567,9 +7567,13 @@ def warn_arith_conv_mixed_enum_types_cxx20 : Warning< "%sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2 is deprecated">, InGroup; -def err_conv_mixed_enum_types_cxx26 : Error< + +def err_conv_mixed_enum_types: Error < "invalid %sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2">; +def warn_conv_mixed_enum_types_cxx26 : Warning < + err_conv_mixed_enum_types.Summary>, + InGroup, DefaultError; def warn_arith_conv_mixed_anon_enum_types : Warning< warn_arith_conv_mixed_enum_types.Summary>, diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e253e3a17328f..eae7f1c3aa781 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1518,8 +1518,8 @@ static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS, unsigned DiagID; // In C++ 26, usual arithmetic conversions between 2 different enum types // are ill-formed. -if (S.getLangOpts().CPlusPlus26) - DiagID = diag::err_conv_mixed_enum_types_cxx26; +if (getLangOpts().CPlusPlus26) + DiagID = diag::warn_conv_mixed_enum_types_cxx26; else if (!L->castAs()->getDecl()->hasNameForLinkage() || !R->castAs()->getDecl()->hasNameForLinkage()) { // If either enumeration type is unnamed, it's less likely that the diff --git a/clang/test/SemaCXX/cxx2c-enum-compare.cpp b/clang/test/SemaCXX/cxx2c-enum-compare.cpp index f47278a60725e..96fbd368b1696 100644 --- a/clang/test/SemaCXX/cxx2c-enum-compare.cpp +++ b/clang/test/SemaCXX/cxx2c-enum-compare.cpp @@ -1,9 +1,10 @@ -// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify -triple %itanium_abi_triple +// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both,expected +// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both -Wno-enum-enum-conversion enum E1 { e }; enum E2 { f }; void test() { -int b = e <= 3.7; // expected-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}} +int b = e <= 3.7; // both-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}} int k = f - e; // expected-error {{invalid arithmetic between different enumeration types ('E2' and 'E1')}} int x = 1 ? e : f; // expected-error {{invalid conditional expression between different enumeration types ('E1' and 'E2')}} } >From 00c10b7fcc95739fc8fc96df7b36efc3541b2c8c Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Tue, 18 Mar 2025 18:48:47 +0100 Subject: [PATCH 2/2] fix build --- clang/lib/Sema/SemaExpr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index eae7f1c3aa781..507a149da7b4b 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1518,7 +1518,7 @@ static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS, unsigned DiagID; // In C++ 26, usual arithmetic conversions between 2 different enum types // are ill-formed. -if (getLangOpts().CPlusPlus26) +if (S.getLangOpts().CPlusPlus26) DiagID = diag::warn_conv_mixed_enum_types_cxx26; else if (!L->castAs()->getDecl()->hasNameForLinkage() || !R->castAs()->getDecl()->hasNameForLinkage()) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://l
[llvm-branch-commits] [llvm] [AMDGPU] Precommit si-fold-bitmask.mir (PR #131310)
https://github.com/Pierre-vh closed https://github.com/llvm/llvm-project/pull/131310 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
@@ -304,6 +304,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } } + MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const override { +switch (Inst.getOpcode()) { +case AArch64::ADR: +case AArch64::ADRP: kbeyls wrote: Maybe there could be a comment here about why `ADR` and `ADRP` produce a "safely materialized address register"? For example, "These instructions produce an address value in the destination register, based only on information in parts of the instruction encoding, i.e. based on information from read-only code memory. Therefore, the value in the register it writes is safe according to the assumed threat model" https://github.com/llvm/llvm-project/pull/132540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-tools-extra] [libcxx] [clang] improved preservation of template keyword (PR #133610)
https://github.com/zyn0217 edited https://github.com/llvm/llvm-project/pull/133610 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/20.x: [LLDB][LoongArch] Fix build errors about NT_LOONGARCH_HW_{BREAK, WATCH} (#126020) (PR #134479)
llvmbot wrote: @llvm/pr-subscribers-lldb Author: None (llvmbot) Changes Backport 50ae1c7 Requested by: @ziyao233 --- Full diff: https://github.com/llvm/llvm-project/pull/134479.diff 1 Files Affected: - (modified) lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp (+14-3) ``diff diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp index b04018ee243fd..601dde2500948 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp @@ -27,13 +27,24 @@ // struct iovec definition #include +// LoongArch SIMD eXtension registers #ifndef NT_LOONGARCH_LSX -#define NT_LOONGARCH_LSX 0xa02 /* LoongArch SIMD eXtension registers */ +#define NT_LOONGARCH_LSX 0xa02 #endif +// LoongArch Advanced SIMD eXtension registers #ifndef NT_LOONGARCH_LASX -#define NT_LOONGARCH_LASX \ - 0xa03 /* LoongArch Advanced SIMD eXtension registers */ +#define NT_LOONGARCH_LASX 0xa03 +#endif + +// LoongArch hardware breakpoint registers +#ifndef NT_LOONGARCH_HW_BREAK +#define NT_LOONGARCH_HW_BREAK 0xa05 +#endif + +// LoongArch hardware watchpoint registers +#ifndef NT_LOONGARCH_HW_WATCH +#define NT_LOONGARCH_HW_WATCH 0xa06 #endif #define REG_CONTEXT_SIZE \ `` https://github.com/llvm/llvm-project/pull/134479 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/20.x: [LLDB][LoongArch] Fix build errors about NT_LOONGARCH_HW_{BREAK, WATCH} (#126020) (PR #134479)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/134479 Backport 50ae1c7 Requested by: @ziyao233 >From e5ea8aee19a9f3b82bed91c6c947965ed613741e Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 7 Feb 2025 11:18:40 +0800 Subject: [PATCH] [LLDB][LoongArch] Fix build errors about NT_LOONGARCH_HW_{BREAK,WATCH} (#126020) On some OS distros such as LoongArch Fedora 38 mate-5 [1], there are no macro definitions NT_LOONGARCH_HW_BREAK and NT_LOONGARCH_HW_WATCH in the system header, then there exist some errors when building LLDB on LoongArch. (1) Description of Problem: ``` llvm-project/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp:529:16: error: 'NT_LOONGARCH_HW_WATCH' was not declared in this scope; did you mean 'NT_LOONGARCH_LBT'? 529 | int regset = NT_LOONGARCH_HW_WATCH; |^ |NT_LOONGARCH_LBT llvm-project/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp:543:12: error: 'NT_LOONGARCH_HW_BREAK' was not declared in this scope; did you mean 'NT_LOONGARCH_CSR'? 543 | regset = NT_LOONGARCH_HW_BREAK; |^ |NT_LOONGARCH_CSR ``` (2) Steps to Reproduce: ``` git clone https://github.com/llvm/llvm-project.git mkdir -p llvm-project/llvm/build && cd llvm-project/llvm/build cmake .. -G "Ninja" \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_RUNTIME=OFF \ -DLLVM_ENABLE_PROJECTS="clang;lldb" \ -DCMAKE_INSTALL_PREFIX=/usr/local/llvm \ -DLLVM_TARGETS_TO_BUILD="LoongArch" \ -DLLVM_HOST_TRIPLE=loongarch64-redhat-linux ninja ``` (3) Additional Info: Maybe there are no problems on the OS distros with newer glibc devel library, so this issue is related with OS distros. (4) Root Cause Analysis: This is because the related Linux kernel commit [2] was merged in 2023-02-25 and the glibc devel library has some delay with kernel, the glibc version of specified OS distros is not updated in time. (5) Final Solution: One way is to ask the maintainer of OS distros to update glibc devel library, but it is better to not depend on the glibc version. In order to avoid the build errors, just define NT_LOONGARCH_HW_BREAK and NT_LOONGARCH_HW_WATCH in LLDB if there are no these definitions in the system header. By the way, in order to fit within 80 columns, use C++-style comments for the new added NT_LOONGARCH_HW_BREAK and NT_LOONGARCH_HW_WATCH. While at it, for consistency, just modify the current NT_LOONGARCH_LSX and NT_LOONGARCH_LASX to C++-style comments too. [1] https://mirrors.wsyu.edu.cn/fedora/linux/development/rawhide/Everything/loongarch64/iso/livecd-fedora-mate-5.loongarch64.iso [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1a69f7a161a7 Signed-off-by: Tiezhu Yang (cherry picked from commit 50ae1c7bf40ba50aaf3132fa869eda8f06648155) --- .../NativeRegisterContextLinux_loongarch64.cpp | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp index b04018ee243fd..601dde2500948 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp @@ -27,13 +27,24 @@ // struct iovec definition #include +// LoongArch SIMD eXtension registers #ifndef NT_LOONGARCH_LSX -#define NT_LOONGARCH_LSX 0xa02 /* LoongArch SIMD eXtension registers */ +#define NT_LOONGARCH_LSX 0xa02 #endif +// LoongArch Advanced SIMD eXtension registers #ifndef NT_LOONGARCH_LASX -#define NT_LOONGARCH_LASX \ - 0xa03 /* LoongArch Advanced SIMD eXtension registers */ +#define NT_LOONGARCH_LASX 0xa03 +#endif + +// LoongArch hardware breakpoint registers +#ifndef NT_LOONGARCH_HW_BREAK +#define NT_LOONGARCH_HW_BREAK 0xa05 +#endif + +// LoongArch hardware watchpoint registers +#ifndef NT_LOONGARCH_HW_WATCH +#define NT_LOONGARCH_HW_WATCH 0xa06 #endif #define REG_CONTEXT_SIZE \ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/20.x: [LLDB][LoongArch] Fix build errors about NT_LOONGARCH_HW_{BREAK, WATCH} (#126020) (PR #134479)
llvmbot wrote: @SixWeining What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/134479 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/20.x: [LLDB][LoongArch] Fix build errors about NT_LOONGARCH_HW_{BREAK, WATCH} (#126020) (PR #134479)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/134479 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopInterchange] Fix the vectorizable check for a loop (PR #133667)
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/133667 >From bd84ddc9e4dc645e965b2a6dc535a3023e0d7e45 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Thu, 27 Mar 2025 09:52:16 + Subject: [PATCH] [LoopInterchange] Fix the vectorizable check for a loop In the profitability check for vectorization, the dependency matrix was not handled correctly. This can result to make a wrong decision: It may say "this loop can be vectorized" when in fact it cannot. The root cause of this is that the check process early returns when it finds '=' or 'I' in the dependency matrix. To make sure that we can actually vectorize the loop, we need to check all the rows of the matrix. This patch fixes the process of checking whether we can vectorize the loop or not. Now it won't make a wrong decision for a loop that cannot be vectorized. Related: #131130 --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 44 --- .../profitability-vectorization-heuristic.ll | 9 ++-- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index e777f950a7c5a..1dccba4cfa7b8 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1197,25 +1197,35 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() { return std::nullopt; } +/// Return true if we can vectorize the loop specified by \p LoopId. +static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) { + for (unsigned I = 0; I != DepMatrix.size(); I++) { +char Dir = DepMatrix[I][LoopId]; +if (Dir != 'I' && Dir != '=') + return false; + } + return true; +} + std::optional LoopInterchangeProfitability::isProfitableForVectorization( unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - for (auto &Row : DepMatrix) { -// If the inner loop is loop independent or doesn't carry any dependency -// it is not profitable to move this to outer position, since we are -// likely able to do inner loop vectorization already. -if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=') - return std::optional(false); - -// If the outer loop is not loop independent it is not profitable to move -// this to inner position, since doing so would not enable inner loop -// parallelism. -if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=') - return std::optional(false); - } - // If inner loop has dependence and outer loop is loop independent then it - // is/ profitable to interchange to enable inner loop parallelism. - // If there are no dependences, interchanging will not improve anything. - return std::optional(!DepMatrix.empty()); + // If the outer loop is not loop independent it is not profitable to move + // this to inner position, since doing so would not enable inner loop + // parallelism. + if (!canVectorize(DepMatrix, OuterLoopId)) +return false; + + // If inner loop has dependence and outer loop is loop independent then it is + // profitable to interchange to enable inner loop parallelism. + if (!canVectorize(DepMatrix, InnerLoopId)) +return true; + + // If both the inner and the outer loop can be vectorized, it is necessary to + // check the cost of each vectorized loop for profitability decision. At this + // time we do not have a cost model to estimate them, so return nullopt. + // TODO: Estimate the cost of vectorized loop when both the outer and the + // inner loop can be vectorized. + return std::nullopt; } bool LoopInterchangeProfitability::isProfitable( diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll index 606117e70db86..b82dd5141a6b2 100644 --- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll +++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll @@ -15,16 +15,13 @@ ; } ; } ; -; FIXME: These loops are not exchanged at this time due to the problem of -; profitablity heuristic for vectorization. -; CHECK: --- !Missed +; CHECK: --- !Passed ; CHECK-NEXT: Pass:loop-interchange -; CHECK-NEXT: Name:InterchangeNotProfitable +; CHECK-NEXT: Name:Interchanged ; CHECK-NEXT: Function:interchange_necesasry_for_vectorization ; CHECK-NEXT: Args: -; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization. -; CHECK-NEXT: ... +; CHECK-NEXT: - String: Loop interchanged with enclosing loop. define void @interchange_necesasry_for_vectorization() { entry: br label %for.i.header ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-b
[llvm-branch-commits] [clang-tools-extra] release/20.x: [clang-tidy] Fix broken HeaderFilterRegex when read from config file (#133582) (PR #134215)
carlosgalvezp wrote: > It's ok as far as it works. Personally if we do not utilize that optional, > then probably it should be removed. It's essentially a revert. I agree that our usage of optional in the Options is not good, we should improve that on a separate patch. https://github.com/llvm/llvm-project/pull/134215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) (PR #134295)
https://github.com/durga4github approved this pull request. https://github.com/llvm/llvm-project/pull/134295 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
@@ -545,9 +545,27 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool WantCRTs = + !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); + + const char *crtbegin, *crtend; + if (WantCRTs) { +if (!Args.hasArg(options::OPT_r)) + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) { + auto RuntimeLib = TC.GetRuntimeLibType(Args); + if (RuntimeLib == ToolChain::RLT_Libgcc) { +crtbegin = "crtbegin.o"; +crtend = "crtend.o"; + } else { +assert(RuntimeLib == ToolChain::RLT_CompilerRT); +crtbegin = +TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object); +crtend = +TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object); petrhosek wrote: I think a `switch` is a better fit here: ```suggestion auto RuntimeLib = TC.GetRuntimeLibType(Args); switch (TC.GetRuntimeLibType(Args)) { case ToolChain::RLT_Libgcc) { crtbegin = "crtbegin.o"; crtend = "crtend.o"; break; } case ToolChain::RLT_CompilerRT: { crtbegin = TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object); crtend = TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object); break; } ``` https://github.com/llvm/llvm-project/pull/121830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Only fold flat offsets if they are inbounds (PR #132353)
https://github.com/ritter-x2a ready_for_review https://github.com/llvm/llvm-project/pull/132353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)
@@ -0,0 +1,115 @@ +#include + +#define XXH_INLINE_ALL +#define XXH_NO_STDLIB +#define XXH_memcpy __builtin_memcpy +#define XXH_memset __builtin_memset +#define XXH_memcmp __builtin_memcmp +#include "../xxhash.h" + +// EmuPAC implements runtime emulation of PAC instructions. If the current +// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the +// emulation, which is effectively an implementation of PAC with an IMPDEF +// hashing scheme based on XXH128. +// +// The purpose of the emulation is to allow programs to be built to be portable +// to machines without PAC support, with some performance loss and increased +// probability of false positives (due to not being able to portably determine +// the VA size), while being functionally almost equivalent to running on a +// machine with PAC support. One example of a use case is if PAC is used in +// production as a security mitigation, but the testing environment is +// heterogeneous (i.e. some machines lack PAC support). In this case we would +// like the testing machines to be able to detect issues resulting +// from the use of PAC instructions that would affect production by running +// tests. This can be achieved by building test binaries with EmuPAC and +// production binaries with real PAC. +// +// The emulation assumes that the VA size is at most 48 bits. The architecture +// as of ARMv8.2, which was the last architecture version in which PAC was not +// mandatory, permitted VA size up to 52 bits via ARMv8.2-LVA, but we are +// unaware of an ARMv8.2 CPU that implemented ARMv8.2-LVA. + +const uint64_t kMaxVASize = 48; +const uint64_t kPACMask = ((1ULL << 55) - 1) & ~((1ULL << kMaxVASize) - 1); +const uint64_t kTTBR1Mask = 1ULL << 55; + +// Determine whether PAC is supported without accessing memory. This utilizes +// the XPACLRI instruction which will copy bit 55 of x30 into at least bit 54 if +// PAC is supported and acts as a NOP if PAC is not supported. +static _Bool pac_supported() { + register uintptr_t x30 __asm__("x30") = 1ULL << 55; + __asm__ __volatile__("xpaclri" : "+r"(x30)); + return x30 & (1ULL << 54); +} + +// This asm snippet is used to force the creation of a frame record when +// calling the EmuPAC functions. This is important because the EmuPAC functions +// may crash if an auth failure is detected and may be unwound past using a +// frame pointer based unwinder. +#ifdef __GCC_HAVE_DWARF2_CFI_ASM +#define frame_pointer_wrap(sym) \ + "stp x29, x30, [sp, #-16]!\n" \ + ".cfi_def_cfa_offset 16\n" \ + "mov x29, sp\n" \ + ".cfi_def_cfa w29, 16\n" \ + ".cfi_offset w30, -8\n" \ + ".cfi_offset w29, -16\n" \ + "bl " #sym "\n" \ + ".cfi_def_cfa wsp, 16\n" \ + "ldp x29, x30, [sp], #16\n" \ + ".cfi_def_cfa_offset 0\n" \ + ".cfi_restore w30\n" \ + ".cfi_restore w29\n" \ + "ret" +#else +#define frame_pointer_wrap(sym) \ + "stp x29, x30, [sp, #-16]!\n" \ + "mov x29, sp\n" \ + "bl " #sym "\n" \ + "ldp x29, x30, [sp], #16\n" \ + "ret" +#endif + +uint64_t __emupac_pacda_impl(uint64_t ptr, uint64_t disc) { + if (pac_supported()) { +__asm__ __volatile__(".arch_extension pauth\npacda %0, %1" + : "+r"(ptr) + : "r"(disc)); +return ptr; + } + if (ptr & kTTBR1Mask) { +if ((ptr & kPACMask) != kPACMask) { + return ptr | kPACMask; +} + } else { +if (ptr & kPACMask) { + return ptr & ~kPACMask; +} + } + uint64_t hash = XXH3_64bits_withSeed(&ptr, 8, disc); + return (ptr & ~kPACMask) | (hash & kPACMask); +} + +__attribute__((naked)) uint64_t __emupac_pacda(uint64_t ptr, uint64_t disc) { + __asm__(frame_pointer_wrap(__emupac_pacda_impl)); +} + +uint64_t __emupac_autda_impl(uint64_t ptr, uint64_t disc) { + if (pac_supported()) { jroelofs wrote: Might be worth writing a lazy ifunc-like resolver for this so it doesn't have to be checked on every call. Something like: ``` typedef BOOL (*fptr_ty)(void); static BOOL resolver(void); // Should be signed with address diversity fptr_ty __emupac_autda_impl = resolver; static BOOL resolver(void) { if (pac_supported()) __emupac_autda_impl = __emupac_autda_pac; else __emupac_autda_impl = __emupac_autda_nopac; return __emupac_autda_impl(); } ``` If we had FMV support for PAC I'd suggest `__attribute__((target_version("pac")))`. cc @labrinea You might also want to consider giving `__emupac_autda` a preserves-none calling convention, so you don't have to save/restore around them. https://github.com/llvm/llvm-project/pull/133530 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 0d0bb63 - Revert "[AMDGPU][CodeGenPrepare] Narrow 64 bit math to 32 bit if profitable (…"
Author: Shoreshen Date: 2025-04-01T16:24:54+08:00 New Revision: 0d0bb63b43da23bc625139c096bc2ebe8be28fbf URL: https://github.com/llvm/llvm-project/commit/0d0bb63b43da23bc625139c096bc2ebe8be28fbf DIFF: https://github.com/llvm/llvm-project/commit/0d0bb63b43da23bc625139c096bc2ebe8be28fbf.diff LOG: Revert "[AMDGPU][CodeGenPrepare] Narrow 64 bit math to 32 bit if profitable (…" This reverts commit 145b4a39504b88a695f1f85f4d9da991bb9a2656. Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll Removed: llvm/test/CodeGen/AMDGPU/narrow_math_for_and.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index eb5c160670992..9c482aeb3ea5c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1561,87 +1561,6 @@ void AMDGPUCodeGenPrepareImpl::expandDivRem64(BinaryOperator &I) const { llvm_unreachable("not a division"); } -Type *findSmallestLegalBits(Instruction *I, int OrigBit, int MaxBitsNeeded, -const TargetLowering *TLI, const DataLayout &DL) { - if (MaxBitsNeeded >= OrigBit) -return nullptr; - - Type *NewType = I->getType()->getWithNewBitWidth(MaxBitsNeeded); - while (OrigBit > MaxBitsNeeded) { -if (TLI->isOperationLegalOrCustom( -TLI->InstructionOpcodeToISD(I->getOpcode()), -TLI->getValueType(DL, NewType, true))) - return NewType; - -MaxBitsNeeded *= 2; -NewType = I->getType()->getWithNewBitWidth(MaxBitsNeeded); - } - return nullptr; -} - -static bool tryNarrowMathIfNoOverflow(Instruction *I, const TargetLowering *TLI, - const TargetTransformInfo &TTI, - const DataLayout &DL) { - unsigned Opc = I->getOpcode(); - Type *OldType = I->getType(); - - if (Opc != Instruction::Add && Opc != Instruction::Mul) -return false; - - unsigned OrigBit = OldType->getScalarSizeInBits(); - unsigned MaxBitsNeeded = OrigBit; - - switch (Opc) { - case Instruction::Add: -MaxBitsNeeded = KnownBits::add(computeKnownBits(I->getOperand(0), DL), - computeKnownBits(I->getOperand(1), DL)) -.countMaxActiveBits(); -break; - case Instruction::Mul: -MaxBitsNeeded = KnownBits::mul(computeKnownBits(I->getOperand(0), DL), - computeKnownBits(I->getOperand(1), DL)) -.countMaxActiveBits(); -break; - default: -llvm_unreachable("Unexpected opcode, only valid for Instruction::Add and " - "Instruction::Mul."); - } - - MaxBitsNeeded = std::max(bit_ceil(MaxBitsNeeded), 8); - Type *NewType = findSmallestLegalBits(I, OrigBit, MaxBitsNeeded, TLI, DL); - - if (!NewType) -return false; - - // Old cost - InstructionCost OldCost = - TTI.getArithmeticInstrCost(Opc, OldType, TTI::TCK_RecipThroughput); - // New cost of new op - InstructionCost NewCost = - TTI.getArithmeticInstrCost(Opc, NewType, TTI::TCK_RecipThroughput); - // New cost of narrowing 2 operands (use trunc) - NewCost += 2 * TTI.getCastInstrCost(Instruction::Trunc, NewType, OldType, - TTI.getCastContextHint(I), - TTI::TCK_RecipThroughput); - // New cost of zext narrowed result to original type - NewCost += - TTI.getCastInstrCost(Instruction::ZExt, OldType, NewType, - TTI.getCastContextHint(I), TTI::TCK_RecipThroughput); - if (NewCost >= OldCost) -return false; - - IRBuilder<> Builder(I); - Value *Trunc0 = Builder.CreateTrunc(I->getOperand(0), NewType); - Value *Trunc1 = Builder.CreateTrunc(I->getOperand(1), NewType); - Value *Arith = - Builder.CreateBinOp((Instruction::BinaryOps)Opc, Trunc0, Trunc1); - - Value *Zext = Builder.CreateZExt(Arith, OldType); - I->replaceAllUsesWith(Zext); - I->eraseFromParent(); - return true; -} - bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { if (foldBinOpIntoSelect(I)) return true; @@ -1726,9 +1645,6 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { } } - Changed = tryNarrowMathIfNoOverflow(&I, ST.getTargetLowering(), - TM.getTargetTransformInfo(F), DL); - return Changed; } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll index d7c35a8b007c6..296b817bc8f75 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll @@ -414,10 +41
[llvm-branch-commits] [mlir] [mlir][memref] Verify out-of-bounds access for `memref.subview` (PR #131876)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/131876 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [ctxprof][nfc] Move 2 implementation functions up in `CtxInstrProfiling.cpp` (PR #133146)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/133146 >From 5579f73a4ad3d8205608eecde962257077578685 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 26 Mar 2025 10:10:43 -0700 Subject: [PATCH] [ctxprof][nfc] Move 2 implementation functions up in `CtxInstrProfiling.cpp` --- .../lib/ctx_profile/CtxInstrProfiling.cpp | 66 +-- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp index b0e63a8861d86..da291e0bbabdd 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp +++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp @@ -244,6 +244,39 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid, return Data.FlatCtx; } +// This should be called once for a Root. Allocate the first arena, set up the +// first context. +void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters, + uint32_t NumCallsites) { + __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( + &AllContextsMutex); + // Re-check - we got here without having had taken a lock. + if (Root->FirstMemBlock) +return; + const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites); + auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed)); + Root->FirstMemBlock = M; + Root->CurrentMem = M; + Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid, + NumCounters, NumCallsites); + AllContextRoots.PushBack(Root); +} + +ContextRoot *FunctionData::getOrAllocateContextRoot() { + auto *Root = CtxRoot; + if (Root) +return Root; + __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex); + Root = CtxRoot; + if (!Root) { +Root = new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot(); +CtxRoot = Root; + } + + assert(Root); + return Root; +} + ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid, uint32_t NumCounters) { @@ -333,39 +366,6 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, return Ret; } -// This should be called once for a Root. Allocate the first arena, set up the -// first context. -void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters, - uint32_t NumCallsites) { - __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( - &AllContextsMutex); - // Re-check - we got here without having had taken a lock. - if (Root->FirstMemBlock) -return; - const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites); - auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed)); - Root->FirstMemBlock = M; - Root->CurrentMem = M; - Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid, - NumCounters, NumCallsites); - AllContextRoots.PushBack(Root); -} - -ContextRoot *FunctionData::getOrAllocateContextRoot() { - auto *Root = CtxRoot; - if (Root) -return Root; - __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex); - Root = CtxRoot; - if (!Root) { -Root = new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot(); -CtxRoot = Root; - } - - assert(Root); - return Root; -} - ContextNode *__llvm_ctx_profile_start_context( FunctionData *FData, GUID Guid, uint32_t Counters, uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ddb09a0 - Revert "[AArch64][SVE] Use FeatureUseFixedOverScalableIfEqualCost for A510 a…"
Author: Nashe Mncube Date: 2025-04-04T14:36:21+01:00 New Revision: ddb09a0c9c764b0195ddbf3916782d0a84bb484a URL: https://github.com/llvm/llvm-project/commit/ddb09a0c9c764b0195ddbf3916782d0a84bb484a DIFF: https://github.com/llvm/llvm-project/commit/ddb09a0c9c764b0195ddbf3916782d0a84bb484a.diff LOG: Revert "[AArch64][SVE] Use FeatureUseFixedOverScalableIfEqualCost for A510 a…" This reverts commit d2bcc11067e682a0753c1068e378d66d59edff73. Added: Modified: llvm/lib/Target/AArch64/AArch64Processors.td Removed: llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index c37dd025d80aa..67d3ff685e6f1 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -723,7 +723,6 @@ def ProcessorFeatures { FeatureSB, FeaturePAuth, FeatureSSBS, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8,FeatureFullFP16, FeatureJS, FeatureLSE, - FeatureUseFixedOverScalableIfEqualCost, FeatureRAS, FeatureRCPC, FeatureRDM]; list A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVEBitPerm, @@ -733,7 +732,6 @@ def ProcessorFeatures { FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, FeatureNEON, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureUseFixedOverScalableIfEqualCost, FeatureDotProd]; list A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVEBitPerm, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll deleted file mode 100644 index 19d0cc0650167..0 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll +++ /dev/null @@ -1,170 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=aarch64-none-elf -mcpu=cortex-a510 -mattr=+sve -passes=loop-vectorize -S | FileCheck %s --check-prefix=CHECK-CA510 -; RUN: opt < %s -mtriple=aarch64-none-elf -mcpu=cortex-a520 -mattr=+sve -passes=loop-vectorize -S | FileCheck %s --check-prefix=CHECK-CA520 - -define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { -; CHECK-CA510-LABEL: define void @sve_add( -; CHECK-CA510-SAME: ptr [[DST:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-CA510-NEXT: [[ENTRY:.*:]] -; CHECK-CA510-NEXT:[[B3:%.*]] = ptrtoint ptr [[B]] to i64 -; CHECK-CA510-NEXT:[[A2:%.*]] = ptrtoint ptr [[A]] to i64 -; CHECK-CA510-NEXT:[[DST1:%.*]] = ptrtoint ptr [[DST]] to i64 -; CHECK-CA510-NEXT:[[CMP9_NOT:%.*]] = icmp eq i64 [[N]], 0 -; CHECK-CA510-NEXT:br i1 [[CMP9_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] -; CHECK-CA510: [[FOR_BODY_PREHEADER]]: -; CHECK-CA510-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 -; CHECK-CA510-NEXT:br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] -; CHECK-CA510: [[VECTOR_MEMCHECK]]: -; CHECK-CA510-NEXT:[[TMP0:%.*]] = sub i64 [[DST1]], [[A2]] -; CHECK-CA510-NEXT:[[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 -; CHECK-CA510-NEXT:[[TMP1:%.*]] = sub i64 [[DST1]], [[B3]] -; CHECK-CA510-NEXT:[[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 -; CHECK-CA510-NEXT:[[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-CA510-NEXT:br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] -; CHECK-CA510: [[VECTOR_PH]]: -; CHECK-CA510-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], 8 -; CHECK-CA510-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-CA510-NEXT:br label %[[VECTOR_BODY:.*]] -; CHECK-CA510: [[VECTOR_BODY]]: -; CHECK-CA510-NEXT:[[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-CA510-NEXT:[[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-CA510-NEXT:[[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] -; CHECK-CA510-NEXT:[[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 0 -; CHECK-CA510-NEXT:[[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 4 -; CHECK-CA510-NEXT:[[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
[llvm-branch-commits] [libcxx] [libc++] Guard additional headers with _LIBCPP_HAS_LOCALIZATION (#131921) (PR #134406)
https://github.com/ldionne created https://github.com/llvm/llvm-project/pull/134406 There were some remaining headers that were not guarded with _LIBCPP_HAS_LOCALIZATION, leading to errors when trying to use modules on platforms that don't support localization (since all the headers get pulled in when building the 'std' module). This patch brings these headers in line with what we do for every other header that depends on localization. This patch also requires including from <__configuration/platform.h> in order to define _NEWLIB_VERSION. In the long term, we should use a better approach for doing that, such as defining a macro in the __config_site header. (cherry picked from commit 4090910a695efcba4b484e9f8ad2b564e9a4e7ed) >From 00a726b91cd732aab2867672c60b48ecae48bdb2 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 4 Apr 2025 11:48:46 -0400 Subject: [PATCH] [libc++] Guard additional headers with _LIBCPP_HAS_LOCALIZATION (#131921) There were some remaining headers that were not guarded with _LIBCPP_HAS_LOCALIZATION, leading to errors when trying to use modules on platforms that don't support localization (since all the headers get pulled in when building the 'std' module). This patch brings these headers in line with what we do for every other header that depends on localization. This patch also requires including from <__configuration/platform.h> in order to define _NEWLIB_VERSION. In the long term, we should use a better approach for doing that, such as defining a macro in the __config_site header. (cherry picked from commit 4090910a695efcba4b484e9f8ad2b564e9a4e7ed) --- libcxx/include/__configuration/platform.h | 7 + libcxx/include/__locale | 153 +- libcxx/include/__locale_dir/locale_base_api.h | 112 +++ libcxx/include/fstream| 55 ++-- libcxx/include/regex | 288 +- libcxx/include/strstream | 55 ++-- .../configs/armv7m-picolibc-libc++.cfg.in | 4 - .../test/libcxx/system_reserved_names.gen.py | 6 + 8 files changed, 354 insertions(+), 326 deletions(-) diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h index 8d0f8f63f5213..f3c199dee172b 100644 --- a/libcxx/include/__configuration/platform.h +++ b/libcxx/include/__configuration/platform.h @@ -42,6 +42,13 @@ # endif #endif +// This is required in order for _NEWLIB_VERSION to be defined in places where we use it. +// TODO: We shouldn't be including arbitrarily-named headers from libc++ since this can break valid +// user code. Move code paths that need _NEWLIB_VERSION to another customization mechanism. +#if __has_include() +# include +#endif + #ifndef __BYTE_ORDER__ # error \ "Your compiler doesn't seem to define __BYTE_ORDER__, which is required by libc++ to know the endianness of your target platform" diff --git a/libcxx/include/__locale b/libcxx/include/__locale index dfe79d5e506f1..93187dc1d0d9c 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -11,6 +11,9 @@ #define _LIBCPP___LOCALE #include <__config> + +#if _LIBCPP_HAS_LOCALIZATION + #include <__locale_dir/locale_base_api.h> #include <__memory/shared_count.h> #include <__mutex/once_flag.h> @@ -24,18 +27,18 @@ #include // Some platforms require more includes than others. Keep the includes on all plaforms for now. -#include -#include +# include +# include -#if _LIBCPP_HAS_WIDE_CHARACTERS -# include -#else -# include <__std_mbstate_t.h> -#endif +# if _LIBCPP_HAS_WIDE_CHARACTERS +#include +# else +#include <__std_mbstate_t.h> +# endif -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -85,9 +88,9 @@ public: // locale operations: string name() const; bool operator==(const locale&) const; -#if _LIBCPP_STD_VER <= 17 +# if _LIBCPP_STD_VER <= 17 _LIBCPP_HIDE_FROM_ABI bool operator!=(const locale& __y) const { return !(*this == __y); } -#endif +# endif template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS bool operator()(const basic_string<_CharT, _Traits, _Allocator>&, const basic_string<_CharT, _Traits, _Allocator>&) const; @@ -237,9 +240,9 @@ long collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) cons } extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#endif +# endif // template class collate_byname; @@ -264,7 +267,7 @@ protected: string_type do_transform(const char_type* __lo, const char_type* __hi) const override; };
[llvm-branch-commits] [libcxx] [libc++] Guard additional headers with _LIBCPP_HAS_LOCALIZATION (#131921) (PR #134406)
llvmbot wrote: @llvm/pr-subscribers-libcxx Author: Louis Dionne (ldionne) Changes There were some remaining headers that were not guarded with _LIBCPP_HAS_LOCALIZATION, leading to errors when trying to use modules on platforms that don't support localization (since all the headers get pulled in when building the 'std' module). This patch brings these headers in line with what we do for every other header that depends on localization. This patch also requires includingfrom <__configuration/platform.h> in order to define _NEWLIB_VERSION. In the long term, we should use a better approach for doing that, such as defining a macro in the __config_site header. (cherry picked from commit 4090910a695efcba4b484e9f8ad2b564e9a4e7ed) --- Patch is 60.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134406.diff 8 Files Affected: - (modified) libcxx/include/__configuration/platform.h (+7) - (modified) libcxx/include/__locale (+79-74) - (modified) libcxx/include/__locale_dir/locale_base_api.h (+58-54) - (modified) libcxx/include/fstream (+28-27) - (modified) libcxx/include/regex (+145-143) - (modified) libcxx/include/strstream (+31-24) - (modified) libcxx/test/configs/armv7m-picolibc-libc++.cfg.in (-4) - (modified) libcxx/test/libcxx/system_reserved_names.gen.py (+6) ``diff diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h index 8d0f8f63f5213..f3c199dee172b 100644 --- a/libcxx/include/__configuration/platform.h +++ b/libcxx/include/__configuration/platform.h @@ -42,6 +42,13 @@ # endif #endif +// This is required in order for _NEWLIB_VERSION to be defined in places where we use it. +// TODO: We shouldn't be including arbitrarily-named headers from libc++ since this can break valid +// user code. Move code paths that need _NEWLIB_VERSION to another customization mechanism. +#if __has_include() +# include +#endif + #ifndef __BYTE_ORDER__ # error \ "Your compiler doesn't seem to define __BYTE_ORDER__, which is required by libc++ to know the endianness of your target platform" diff --git a/libcxx/include/__locale b/libcxx/include/__locale index dfe79d5e506f1..93187dc1d0d9c 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -11,6 +11,9 @@ #define _LIBCPP___LOCALE #include <__config> + +#if _LIBCPP_HAS_LOCALIZATION + #include <__locale_dir/locale_base_api.h> #include <__memory/shared_count.h> #include <__mutex/once_flag.h> @@ -24,18 +27,18 @@ #include // Some platforms require more includes than others. Keep the includes on all plaforms for now. -#include -#include +# include +# include -#if _LIBCPP_HAS_WIDE_CHARACTERS -# include -#else -# include <__std_mbstate_t.h> -#endif +# if _LIBCPP_HAS_WIDE_CHARACTERS +#include +# else +#include <__std_mbstate_t.h> +# endif -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -85,9 +88,9 @@ public: // locale operations: string name() const; bool operator==(const locale&) const; -#if _LIBCPP_STD_VER <= 17 +# if _LIBCPP_STD_VER <= 17 _LIBCPP_HIDE_FROM_ABI bool operator!=(const locale& __y) const { return !(*this == __y); } -#endif +# endif template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS bool operator()(const basic_string<_CharT, _Traits, _Allocator>&, const basic_string<_CharT, _Traits, _Allocator>&) const; @@ -237,9 +240,9 @@ long collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) cons } extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#endif +# endif // template class collate_byname; @@ -264,7 +267,7 @@ protected: string_type do_transform(const char_type* __lo, const char_type* __hi) const override; }; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI collate_byname : public collate { __locale::__locale_t __l_; @@ -283,7 +286,7 @@ protected: const char_type* __lo1, const char_type* __hi1, const char_type* __lo2, const char_type* __hi2) const override; string_type do_transform(const char_type* __lo, const char_type* __hi) const override; }; -#endif +# endif template bool locale::operator()(const basic_string<_CharT, _Traits, _Allocator>& __x, @@ -296,7 +299,7 @@ bool locale::operator()(const basic_string<_CharT, _Traits, _Allocator>& __x, class _LIBCPP_EXPORTED_FROM_ABI ctype_base { public: -#if defined(_LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE) +# if defined(_LIBCPP_PROVIDES_DEFAUL
[llvm-branch-commits] [clang] Revert "[clang] [ARM] Explicitly enable NEON for Windows/Darwin targets (#122095)" (PR #134407)
https://github.com/mstorsjo created https://github.com/llvm/llvm-project/pull/134407 This reverts commit 8fa0f0efce5fb81eb422e6d7eec74c66dafef4a3. This change broke assembling for e.g. "armv7s-apple-darwin" triples, which should enable VFPv4 by default (and did that before this change), but after this change, only NEON/VFPv3 were available. This is being fixed properly in latest git main as part of https://github.com/llvm/llvm-project/pull/130623 (possibly as a split out change), but any proper fix here seems to have too much potential surprises for an existing release branch. From 5f745aed394af68093a4f181a5334604b35d38f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 4 Apr 2025 17:53:19 +0300 Subject: [PATCH] Revert "[clang] [ARM] Explicitly enable NEON for Windows/Darwin targets (#122095)" This reverts commit 8fa0f0efce5fb81eb422e6d7eec74c66dafef4a3. This change broke assembling for e.g. "armv7s-apple-darwin" triples, which should enable VFPv4 by default (and did that before this change), but after this change, only NEON/VFPv3 were available. This is being fixed properly in latest git main as part of https://github.com/llvm/llvm-project/pull/130623 (possibly as a split out change), but any proper fix here seems to have too much potential surprises for an existing release branch. --- clang/lib/Driver/ToolChains/Arch/ARM.cpp | 8 -- clang/test/Driver/arm-mfpu.c | 6 ++--- clang/test/Preprocessor/arm-target-features.c | 27 --- 3 files changed, 2 insertions(+), 39 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 3aee540d501be..ef2d0c93b5b0b 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -658,21 +658,13 @@ llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D, CPUArgFPUKind != llvm::ARM::FK_INVALID ? CPUArgFPUKind : ArchArgFPUKind; (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } else { -bool Generic = true; if (!ForAS) { std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); - if (CPU != "generic") -Generic = false; llvm::ARM::ArchKind ArchKind = arm::getLLVMArchKindForARM(CPU, ArchName, Triple); FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind); (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } -if (Generic && (Triple.isOSWindows() || Triple.isOSDarwin()) && -getARMSubArchVersionNumber(Triple) >= 7) { - FPUKind = llvm::ARM::parseFPU("neon"); - (void)llvm::ARM::getFPUFeatures(FPUKind, Features); -} } // Now we've finished accumulating features from arch, cpu and fpu, diff --git a/clang/test/Driver/arm-mfpu.c b/clang/test/Driver/arm-mfpu.c index 640e1b35c84b8..a9bdcd598516a 100644 --- a/clang/test/Driver/arm-mfpu.c +++ b/clang/test/Driver/arm-mfpu.c @@ -356,10 +356,8 @@ // CHECK-HF-DAG: "-target-cpu" "arm1176jzf-s" // RUN: %clang -target armv7-apple-darwin -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// RUN: %clang -target armv7-windows -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// ASM-NEON: "-target-feature" "+neon" +// RUN: | FileCheck --check-prefix=ASM %s +// ASM-NOT: -target-feature // RUN: %clang -target armv8-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index ecf9d7eb5c19c..27eb9a322d7c2 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -132,30 +132,6 @@ // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_ARCH_EXT_IDIV__ 1 // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_FP 0xc -// RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-DARWIN-V7 %s -// CHECK-DARWIN-V7: #define __ARMEL__ 1 -// CHECK-DARWIN-V7: #define __ARM_ARCH 7 -// CHECK-DARWIN-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-DARWIN-V7: #define __ARM_FP 0xc -// CHECK-DARWIN-V7: #define __ARM_NEON 1 -// CHECK-DARWIN-V7: #define __ARM_NEON_FP 0x4 -// CHECK-DARWIN-V7: #define __ARM_NEON__ 1 - -// RUN: %clang -target armv7-windows -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-WINDOWS-V7 %s -// CHECK-WINDOWS-V7: #define __ARMEL__ 1 -// CHECK-WINDOWS-V7: #define __ARM_ARCH 7 -// CHECK-WINDOWS-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-WINDOWS-V7: #define __ARM_FP 0xe
[llvm-branch-commits] [libcxx] [libc++] Guard additional headers with _LIBCPP_HAS_LOCALIZATION (#131921) (PR #134406)
https://github.com/ldionne milestoned https://github.com/llvm/llvm-project/pull/134406 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
@@ -545,9 +545,27 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool WantCRTs = quic-garvgupt wrote: Done https://github.com/llvm/llvm-project/pull/121830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Revert "[clang] [ARM] Explicitly enable NEON for Windows/Darwin targets (#122095)" (PR #134407)
https://github.com/mstorsjo milestoned https://github.com/llvm/llvm-project/pull/134407 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Revert "[clang] [ARM] Explicitly enable NEON for Windows/Darwin targets (#122095)" (PR #134407)
llvmbot wrote: @llvm/pr-subscribers-backend-arm Author: Martin Storsjö (mstorsjo) Changes This reverts commit 8fa0f0efce5fb81eb422e6d7eec74c66dafef4a3. This change broke assembling for e.g. "armv7s-apple-darwin" triples, which should enable VFPv4 by default (and did that before this change), but after this change, only NEON/VFPv3 were available. This is being fixed properly in latest git main as part of https://github.com/llvm/llvm-project/pull/130623 (possibly as a split out change), but any proper fix here seems to have too much potential surprises for an existing release branch. --- Full diff: https://github.com/llvm/llvm-project/pull/134407.diff 3 Files Affected: - (modified) clang/lib/Driver/ToolChains/Arch/ARM.cpp (-8) - (modified) clang/test/Driver/arm-mfpu.c (+2-4) - (modified) clang/test/Preprocessor/arm-target-features.c (-27) ``diff diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 3aee540d501be..ef2d0c93b5b0b 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -658,21 +658,13 @@ llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D, CPUArgFPUKind != llvm::ARM::FK_INVALID ? CPUArgFPUKind : ArchArgFPUKind; (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } else { -bool Generic = true; if (!ForAS) { std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); - if (CPU != "generic") -Generic = false; llvm::ARM::ArchKind ArchKind = arm::getLLVMArchKindForARM(CPU, ArchName, Triple); FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind); (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } -if (Generic && (Triple.isOSWindows() || Triple.isOSDarwin()) && -getARMSubArchVersionNumber(Triple) >= 7) { - FPUKind = llvm::ARM::parseFPU("neon"); - (void)llvm::ARM::getFPUFeatures(FPUKind, Features); -} } // Now we've finished accumulating features from arch, cpu and fpu, diff --git a/clang/test/Driver/arm-mfpu.c b/clang/test/Driver/arm-mfpu.c index 640e1b35c84b8..a9bdcd598516a 100644 --- a/clang/test/Driver/arm-mfpu.c +++ b/clang/test/Driver/arm-mfpu.c @@ -356,10 +356,8 @@ // CHECK-HF-DAG: "-target-cpu" "arm1176jzf-s" // RUN: %clang -target armv7-apple-darwin -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// RUN: %clang -target armv7-windows -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// ASM-NEON: "-target-feature" "+neon" +// RUN: | FileCheck --check-prefix=ASM %s +// ASM-NOT: -target-feature // RUN: %clang -target armv8-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index ecf9d7eb5c19c..27eb9a322d7c2 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -132,30 +132,6 @@ // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_ARCH_EXT_IDIV__ 1 // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_FP 0xc -// RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-DARWIN-V7 %s -// CHECK-DARWIN-V7: #define __ARMEL__ 1 -// CHECK-DARWIN-V7: #define __ARM_ARCH 7 -// CHECK-DARWIN-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-DARWIN-V7: #define __ARM_FP 0xc -// CHECK-DARWIN-V7: #define __ARM_NEON 1 -// CHECK-DARWIN-V7: #define __ARM_NEON_FP 0x4 -// CHECK-DARWIN-V7: #define __ARM_NEON__ 1 - -// RUN: %clang -target armv7-windows -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-WINDOWS-V7 %s -// CHECK-WINDOWS-V7: #define __ARMEL__ 1 -// CHECK-WINDOWS-V7: #define __ARM_ARCH 7 -// CHECK-WINDOWS-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-WINDOWS-V7: #define __ARM_FP 0xe -// CHECK-WINDOWS-V7: #define __ARM_NEON 1 -// CHECK-WINDOWS-V7: #define __ARM_NEON_FP 0x6 -// CHECK-WINDOWS-V7: #define __ARM_NEON__ 1 - // RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7s -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7S %s // CHECK-V7S: #define __ARMEL__ 1 // CHECK-V7S: #define __ARM_ARCH 7 @@ -164,9 +140,6 @@ // CHECK-V7S-NOT: __ARM_FEATURE_NUMERIC_MAXMIN // CHECK-V7S-NOT: __ARM_FEATURE_DIRECTED_ROUNDING // CHECK-V7S: #define __ARM_FP 0xe -// CHECK-V7S: #define __ARM_NEON 1 -// CHECK-V7S: #define __ARM_NEON_FP 0x6 -// CHECK-V7S: #define __ARM_NEON__ 1 // RUN: %clang -target arm-arm-none-eabi -march=armv7-m -mfloat-abi=soft -x c
[llvm-branch-commits] [clang] Revert "[clang] [ARM] Explicitly enable NEON for Windows/Darwin targets (#122095)" (PR #134407)
llvmbot wrote: @llvm/pr-subscribers-clang-driver Author: Martin Storsjö (mstorsjo) Changes This reverts commit 8fa0f0efce5fb81eb422e6d7eec74c66dafef4a3. This change broke assembling for e.g. "armv7s-apple-darwin" triples, which should enable VFPv4 by default (and did that before this change), but after this change, only NEON/VFPv3 were available. This is being fixed properly in latest git main as part of https://github.com/llvm/llvm-project/pull/130623 (possibly as a split out change), but any proper fix here seems to have too much potential surprises for an existing release branch. --- Full diff: https://github.com/llvm/llvm-project/pull/134407.diff 3 Files Affected: - (modified) clang/lib/Driver/ToolChains/Arch/ARM.cpp (-8) - (modified) clang/test/Driver/arm-mfpu.c (+2-4) - (modified) clang/test/Preprocessor/arm-target-features.c (-27) ``diff diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 3aee540d501be..ef2d0c93b5b0b 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -658,21 +658,13 @@ llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D, CPUArgFPUKind != llvm::ARM::FK_INVALID ? CPUArgFPUKind : ArchArgFPUKind; (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } else { -bool Generic = true; if (!ForAS) { std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); - if (CPU != "generic") -Generic = false; llvm::ARM::ArchKind ArchKind = arm::getLLVMArchKindForARM(CPU, ArchName, Triple); FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind); (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } -if (Generic && (Triple.isOSWindows() || Triple.isOSDarwin()) && -getARMSubArchVersionNumber(Triple) >= 7) { - FPUKind = llvm::ARM::parseFPU("neon"); - (void)llvm::ARM::getFPUFeatures(FPUKind, Features); -} } // Now we've finished accumulating features from arch, cpu and fpu, diff --git a/clang/test/Driver/arm-mfpu.c b/clang/test/Driver/arm-mfpu.c index 640e1b35c84b8..a9bdcd598516a 100644 --- a/clang/test/Driver/arm-mfpu.c +++ b/clang/test/Driver/arm-mfpu.c @@ -356,10 +356,8 @@ // CHECK-HF-DAG: "-target-cpu" "arm1176jzf-s" // RUN: %clang -target armv7-apple-darwin -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// RUN: %clang -target armv7-windows -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// ASM-NEON: "-target-feature" "+neon" +// RUN: | FileCheck --check-prefix=ASM %s +// ASM-NOT: -target-feature // RUN: %clang -target armv8-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index ecf9d7eb5c19c..27eb9a322d7c2 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -132,30 +132,6 @@ // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_ARCH_EXT_IDIV__ 1 // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_FP 0xc -// RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-DARWIN-V7 %s -// CHECK-DARWIN-V7: #define __ARMEL__ 1 -// CHECK-DARWIN-V7: #define __ARM_ARCH 7 -// CHECK-DARWIN-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-DARWIN-V7: #define __ARM_FP 0xc -// CHECK-DARWIN-V7: #define __ARM_NEON 1 -// CHECK-DARWIN-V7: #define __ARM_NEON_FP 0x4 -// CHECK-DARWIN-V7: #define __ARM_NEON__ 1 - -// RUN: %clang -target armv7-windows -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-WINDOWS-V7 %s -// CHECK-WINDOWS-V7: #define __ARMEL__ 1 -// CHECK-WINDOWS-V7: #define __ARM_ARCH 7 -// CHECK-WINDOWS-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-WINDOWS-V7: #define __ARM_FP 0xe -// CHECK-WINDOWS-V7: #define __ARM_NEON 1 -// CHECK-WINDOWS-V7: #define __ARM_NEON_FP 0x6 -// CHECK-WINDOWS-V7: #define __ARM_NEON__ 1 - // RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7s -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7S %s // CHECK-V7S: #define __ARMEL__ 1 // CHECK-V7S: #define __ARM_ARCH 7 @@ -164,9 +140,6 @@ // CHECK-V7S-NOT: __ARM_FEATURE_NUMERIC_MAXMIN // CHECK-V7S-NOT: __ARM_FEATURE_DIRECTED_ROUNDING // CHECK-V7S: #define __ARM_FP 0xe -// CHECK-V7S: #define __ARM_NEON 1 -// CHECK-V7S: #define __ARM_NEON_FP 0x6 -// CHECK-V7S: #define __ARM_NEON__ 1 // RUN: %clang -target arm-arm-none-eabi -march=armv7-m -mfloat-abi=soft -x c
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
@@ -545,9 +545,27 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool WantCRTs = + !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); + + const char *crtbegin, *crtend; + if (WantCRTs) { +if (!Args.hasArg(options::OPT_r)) + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) { + auto RuntimeLib = TC.GetRuntimeLibType(Args); + if (RuntimeLib == ToolChain::RLT_Libgcc) { +crtbegin = "crtbegin.o"; +crtend = "crtend.o"; + } else { +assert(RuntimeLib == ToolChain::RLT_CompilerRT); +crtbegin = +TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object); +crtend = +TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object); quic-garvgupt wrote: Done https://github.com/llvm/llvm-project/pull/121830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object (PR #121831)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/121831 >From d2ec1a627542ce41c5ef7c2faa5149e8d96b4c5a Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 6 Jan 2025 10:05:08 -0800 Subject: [PATCH] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object This patch: - Adds CXXStdlib, runtimelib and unwindlib defaults for riscv target to BareMetal toolchain object. - Add riscv 32 and 64-bit emulation flags to linker job of BareMetal toolchain. - Removes call to RISCVToolChain object from llvm. This PR is last patch in the series of patches of merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: I2877ac328f55a7638cc185d6034866cbd2ac4203 --- clang/lib/Driver/CMakeLists.txt | 1 - clang/lib/Driver/Driver.cpp | 10 +- clang/lib/Driver/ToolChains/BareMetal.cpp | 33 ++- clang/lib/Driver/ToolChains/BareMetal.h | 11 +- .../lib/Driver/ToolChains/RISCVToolchain.cpp | 232 -- clang/lib/Driver/ToolChains/RISCVToolchain.h | 67 - .../test/Driver/baremetal-undefined-symbols.c | 14 +- clang/test/Driver/baremetal.cpp | 44 ++-- clang/test/Driver/riscv32-toolchain-extra.c | 7 +- clang/test/Driver/riscv32-toolchain.c | 26 +- clang/test/Driver/riscv64-toolchain-extra.c | 7 +- clang/test/Driver/riscv64-toolchain.c | 20 +- 12 files changed, 91 insertions(+), 381 deletions(-) delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.cpp delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.h diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index 5bdb6614389cf..eee29af5d181a 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -74,7 +74,6 @@ add_clang_library(clangDriver ToolChains/OHOS.cpp ToolChains/OpenBSD.cpp ToolChains/PS4CPU.cpp - ToolChains/RISCVToolchain.cpp ToolChains/Solaris.cpp ToolChains/SPIRV.cpp ToolChains/SPIRVOpenMP.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 056bfcf1b739a..82b49da928a79 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -41,7 +41,6 @@ #include "ToolChains/PPCFreeBSD.h" #include "ToolChains/PPCLinux.h" #include "ToolChains/PS4CPU.h" -#include "ToolChains/RISCVToolchain.h" #include "ToolChains/SPIRV.h" #include "ToolChains/SPIRVOpenMP.h" #include "ToolChains/SYCL.h" @@ -6886,16 +6885,11 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::msp430: -TC = -std::make_unique(*this, Target, Args); +TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: -if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args)) - TC = - std::make_unique(*this, Target, Args); -else - TC = std::make_unique(*this, Target, Args); +TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::ve: TC = std::make_unique(*this, Target, Args); diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index c4e813a2f0e5a..5f48245de25d5 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -336,6 +336,26 @@ BareMetal::OrderedMultilibs BareMetal::getOrderedMultilibs() const { return llvm::reverse(Default); } +ToolChain::CXXStdlibType BareMetal::GetDefaultCXXStdlibType() const { + if (getTriple().isRISCV() && GCCInstallation.isValid()) +return ToolChain::CST_Libstdcxx; + return ToolChain::CST_Libcxx; +} + +ToolChain::RuntimeLibType BareMetal::GetDefaultRuntimeLibType() const { + if (getTriple().isRISCV() && GCCInstallation.isValid()) +return ToolChain::RLT_Libgcc; + return ToolChain::RLT_CompilerRT; +} + +ToolChain::UnwindLibType +BareMetal::GetUnwindLibType(const llvm::opt::ArgList &Args) const { + if (getTriple().isRISCV()) +return ToolChain::UNW_None; + + return ToolChain::GetUnwindLibType(Args); +} + void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (DriverArgs.hasArg(options::OPT_nostdinc)) @@ -534,8 +554,14 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-Bstatic"); - if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) -CmdArgs.push_back("--no-relax"); + if (Triple.isRISCV()) { +if (Args.hasArg(options::OPT_mno_relax)) + CmdArgs.push_back("--no-relax"); +CmdArgs.push_back("-m"); +CmdArgs.push_back(Arch == llvm::Triple::riscv64 ? "elf64lriscv" +
[llvm-branch-commits] [clang-tools-extra] release/20.x: [clang-tidy] Fix broken HeaderFilterRegex when read from config file (#133582) (PR #134215)
https://github.com/PiotrZSL approved this pull request. It's ok as far as it works. Personally if we do not utilize that optional, then probably it should be removed. https://github.com/llvm/llvm-project/pull/134215 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
@@ -545,9 +545,27 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool WantCRTs = + !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); + + const char *crtbegin, *crtend; quic-garvgupt wrote: Done https://github.com/llvm/llvm-project/pull/121830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/121830 >From ef7e1d493d8621a78bea93454b14ac3b4e9d5673 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 04:58:57 -0700 Subject: [PATCH] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object This patch conditionalise the addition of crt{begin,end}.o object files along with addition of -lgloss lib based on whether libc selected is newlib or llvm libc. Since there is no way a user can specify which libc it wants to link against, currently passing valid GCCInstallation to driver will select newlib otherwise it will default to llvm libc. Moreover, this patch makes gnuld the default linker for baremetal toolchain object. User need to pass `-fuse-ld=lld` explicitly to driver to select lld This is the 2nd patch in the series of patches of merging RISCVToolchain into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ie06dc976c306cf04ec2733bbb2d271c57d201f86 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 38 - clang/lib/Driver/ToolChains/BareMetal.h | 3 +- clang/test/Driver/aarch64-toolchain-extra.c | 13 ++- clang/test/Driver/aarch64-toolchain.c | 83 +++ clang/test/Driver/arm-toolchain-extra.c | 7 ++ clang/test/Driver/arm-toolchain.c | 88 - clang/test/Driver/baremetal.cpp | 3 +- clang/test/Driver/sanitizer-ld.c| 2 +- 8 files changed, 224 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index b2e62e3d254af..8343865cd4dc1 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -545,9 +545,31 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool NeedCRTs = + !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); + + const char *CRTBegin, *CRTEnd; + if (NeedCRTs) { +if (!Args.hasArg(options::OPT_r)) + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) { + auto RuntimeLib = TC.GetRuntimeLibType(Args); + switch (TC.GetRuntimeLibType(Args)) { + case (ToolChain::RLT_Libgcc): { +CRTBegin = "crtbegin.o"; +CRTEnd = "crtend.o"; +break; + } + case (ToolChain::RLT_CompilerRT): { +CRTBegin = +TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object); +CRTEnd = +TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object); +break; + } + } + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTBegin))); +} } Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, @@ -570,9 +592,12 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { +CmdArgs.push_back("--start-group"); AddRunTimeLibs(TC, D, CmdArgs, Args); - CmdArgs.push_back("-lc"); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) + CmdArgs.push_back("-lgloss"); +CmdArgs.push_back("--end-group"); } if (D.isUsingLTO()) { @@ -588,6 +613,11 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, addLTOOptions(TC, Args, CmdArgs, Output, *Input, D.getLTOMode() == LTOK_Thin); } + + if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) && + NeedCRTs) +CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd))); + if (TC.getTriple().isRISCV()) CmdArgs.push_back("-X"); diff --git a/clang/lib/Driver/ToolChains/BareMetal.h b/clang/lib/Driver/ToolChains/BareMetal.h index 2a791e7672e5e..87f173342def2 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.h +++ b/clang/lib/Driver/ToolChains/BareMetal.h @@ -36,6 +36,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { Tool *buildStaticLibTool() const override; public: + bool hasValidGCCInstallation() const { return GCCInstallation.isValid(); } bool isBareMetal() const override { return true; } bool isCrossCompiling() const override { return true; } bool HasNativeLLVMSupport() const override { return true; } @@ -60,8 +61,6 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { return ToolChain::CST_Libcxx; } - const char *getDefaultLinker() const override { return "ld.lld"; } - void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
[llvm-branch-commits] [llvm] Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (PR #134408)
llvmbot wrote: @llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-backend-aarch64 Author: Sander de Smalen (sdesmalen-arm) Changes I had to previously revert #123632 due to failures on X86 and it took me a while before I had the time to get back to this. This PR tries to reland the original patch, with additional fixes. The PR is structured as follows: * The `git revert`ed patch (with tests updated) * A fix to only add the implicit-def when tracking subreg-liveness of the destination register. * A fix to only add the implicit-def when the destination register is not dead. * Updated tests after latest rebase. The PR depends on #131361, which was split off as a separate PR. --- Patch is 141.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134408.diff 31 Files Affected: - (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+70-16) - (modified) llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll (+2-2) - (modified) llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll (+5-5) - (added) llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll (+51) - (added) llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir (+30) - (modified) llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir (+55-3) - (modified) llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (+3-4) - (modified) llvm/test/CodeGen/AMDGPU/fptosi.f16.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/fptoui.f16.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll (+11-12) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll (+11-12) - (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+6-10) - (modified) llvm/test/CodeGen/AMDGPU/select.f16.ll (+53-55) - (modified) llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll (+6-6) - (modified) llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll (+4) - (modified) llvm/test/CodeGen/PowerPC/build-vector-tests.ll (+48) - (modified) llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll (+6) - (modified) llvm/test/CodeGen/PowerPC/combine-fneg.ll (+1) - (modified) llvm/test/CodeGen/PowerPC/fp-strict-round.ll (+6) - (modified) llvm/test/CodeGen/PowerPC/frem.ll (+3) - (modified) llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll (+1) - (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+2) - (modified) llvm/test/CodeGen/PowerPC/llvm.modf.ll (+1) - (modified) llvm/test/CodeGen/PowerPC/vec_insert_elt.ll (+4) - (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+176) - (added) llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll (+185) - (added) llvm/test/CodeGen/X86/coalescer-subreg-to-reg-implicit-def-regression.mir (+62) - (added) llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir (+47) - (added) llvm/test/CodeGen/X86/pr76416.ll (+79) - (modified) llvm/test/CodeGen/X86/subreg-fail.mir (+2-2) - (added) llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir (+372) ``diff diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index dbd354f2ca2c4..963f5620d8dba 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -306,7 +306,11 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); + /// + /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG + /// SrcReg. This introduces an implicit-def of DstReg on coalesced users. + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, + bool IsSubregToReg); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1444,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // CopyMI may have implicit operands, save them so that we can transfer them // over to the newly materialized instruction after CopyMI is removed. + LaneBitmask NewMIImplicitOpsMask; SmallVector ImplicitOps; ImplicitOps.reserve(CopyMI->getNumOperands() - CopyMI->getDesc().getNumOperands()); @@ -1458,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && "unexpected implicit virtual register def"); ImplicitOps.push_back(MO); + if (MO.isDef() && MO.getReg().isVirtual() && + MRI->shouldTrackSubRegLiveness(DstReg)) +NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } @@ -1500,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
[llvm-branch-commits] [llvm] Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (PR #134408)
@@ -329,11 +329,10 @@ define <2 x half> @chain_hi_to_lo_global() { ; GFX11-TRUE16: ; %bb.0: ; %bb ; GFX11-TRUE16-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT:v_mov_b32_e32 v0, 2 -; GFX11-TRUE16-NEXT:v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT:v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 +; GFX11-TRUE16-NEXT:v_mov_b32_e32 v3, 0 ; GFX11-TRUE16-NEXT:global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT:v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT:v_mov_b32_e32 v2, 0 -; GFX11-TRUE16-NEXT:global_load_d16_hi_b16 v0, v[1:2], off +; GFX11-TRUE16-NEXT:global_load_d16_hi_b16 v0, v[2:3], off sdesmalen-arm wrote: @broxigarchen I noticed these tests changed, but I couldn't really tell whether these changes are functionally equivalent. https://github.com/llvm/llvm-project/pull/134408 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] e789e78 - Revert "[LLDB] Expose checking if the symbol file exists/is loaded via SBModu…"
Author: Jacob Lalonde Date: 2025-04-03T20:45:32-07:00 New Revision: e789e78d20c0dfe70d4e977b259460f1c8008515 URL: https://github.com/llvm/llvm-project/commit/e789e78d20c0dfe70d4e977b259460f1c8008515 DIFF: https://github.com/llvm/llvm-project/commit/e789e78d20c0dfe70d4e977b259460f1c8008515.diff LOG: Revert "[LLDB] Expose checking if the symbol file exists/is loaded via SBModu…" This reverts commit b8d8405238387ddd92450d6a3ad84350254e76a3. Added: Modified: lldb/include/lldb/API/SBModule.h lldb/source/API/SBModule.cpp Removed: diff --git a/lldb/include/lldb/API/SBModule.h b/lldb/include/lldb/API/SBModule.h index 651455bdb78d2..85332066ee687 100644 --- a/lldb/include/lldb/API/SBModule.h +++ b/lldb/include/lldb/API/SBModule.h @@ -290,9 +290,6 @@ class LLDB_API SBModule { lldb::SBAddress GetObjectFileHeaderAddress() const; lldb::SBAddress GetObjectFileEntryPointAddress() const; - /// Get if the symbol file for this module is loaded. - bool IsDebugInfoLoaded() const; - /// Get the number of global modules. static uint32_t GetNumberAllocatedModules(); diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp index 4978a553f57c7..985107ec68efd 100644 --- a/lldb/source/API/SBModule.cpp +++ b/lldb/source/API/SBModule.cpp @@ -659,18 +659,6 @@ lldb::SBAddress SBModule::GetObjectFileEntryPointAddress() const { return sb_addr; } -bool SBModule::IsDebugInfoLoaded() const { - LLDB_INSTRUMENT_VA(this); - - ModuleSP module_sp(GetSP()); - if (module_sp) { -SymbolFile *sym_file = module_sp->GetSymbolFile(/*create=*/false); -return sym_file && sym_file->GetLoadDebugInfoEnabled(); - } - - return false; -} - uint32_t SBModule::GetNumberAllocatedModules() { LLDB_INSTRUMENT(); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [X86] When expanding LCMPXCHG16B_SAVE_RBX, substitute RBX in base (#134109) (PR #134331)
phoebewang wrote: > @phoebewang What do you think about merging this PR to the release branch? I think it's ok to merge. https://github.com/llvm/llvm-project/pull/134331 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
uweigand wrote: > I think that splitting the SD/ED/LD into 3 "section"s implies that a > MCSectionGOFF has a fundamentally different semantic than the other > MCSectionXXX. This is something I would like to avoid. On the other hand, the > SD/ED pair is almost the same as an ELF section, so just putting those 2 into > a MCSectionGOFF instance and handling the LD/PR symbol differently makes > sense. Thinking a bit more about this, it looks to me that we should treat SD/ED/PR on the one hand differently from LD (and ER) on the other. The former identify a *range* of address space and may hold contents of those ranges in the form of text records; the latter identify a single address (and hold no content of their own). >From that perspective, the former correspond to the "section" concept, while >the latter correspond to the "symbol" concept. Now, among the section types >SD/ED/PR, GOFF is a bit special in that those are nested - this is somewhat >similar to the subsection concept, but it is explicit in the object file >format (as opposed to, say, ELF subsections). It seems to me that modelling that nested section concept explicitly by creating a separate MCSectionGOFF for each of SD, ED, and PR, and linking them as appropriate via a `Parent` pointer (which we actually already have today!), doesn't look too fundamentally different ... As long as we ensure that text emission happens into the right section (ED or PR as appropriate), this should work fine with common code. In fact, considering that at some point we want to be able to implement a general HLASM AsmParser, which would require handling any allowed combination of CSECT with multiple CATTR, we should *not* merge SD and ED into a single section. (Also, by having them separately, we no longer need special treatment of the "root" SD in the writer.) Finally, having separate MCSession structures for each ESD record may allow using the MCSession::Ordinal field as the ESD ID, which matches its purpose for other object file formats, and which would allow easy resolution of parent (and ADA) section pointers to ESD IDs in the writer. The LD record, on the other hand, clearly should *not* get a MCSectionGOFF. Rather, it would make sense for this to be represented as a MCSymbolGOFF. Specifically, this symbol really represents the implicit section start symbol (which ELF also has!); so it should probably best be emitted not from the section table but from the symbol table. (MCSection already has a `Begin` symbol - it should be possible to use this for that purpose.) That would also unify emission of that type of LD record with the other LD records for "normal" symbols. Attributes associated with the LD record should likewise come from the MCSymbolGOFF. This would include the ADA section, which means that association no longer needs to be hard-coded in the writer, but can instead set up by codegen as appropriate when defining symbols. (E.g. this would also allow handling arbitrary user-provided XATTR PSECT attributes in an HLASM AsmParser.) https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
llvmbot wrote: @llvm/pr-subscribers-mc Author: Kai Nacke (redstar) Changes The GOFF format uses symbol definitions to represent sections and symbols. Introducing a section can require up to 3 symbol definitions. However, most of these details are not needed by the AsmPrinter. To mapped from a section (a MCSectionGOFF) to the symbol definitions, a new class called MCGOFFSymbolMapper is used. The same information can also be used by the assembly output, which justifies this centralized approach. Writing the mapped symbols is then straight forward. --- Patch is 35.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133799.diff 8 Files Affected: - (modified) llvm/include/llvm/BinaryFormat/GOFF.h (+85) - (added) llvm/include/llvm/MC/MCGOFFSymbolMapper.h (+148) - (modified) llvm/lib/MC/CMakeLists.txt (+1) - (modified) llvm/lib/MC/GOFFObjectWriter.cpp (+253-37) - (added) llvm/lib/MC/MCGOFFSymbolMapper.cpp (+203) - (modified) llvm/lib/MC/MCObjectFileInfo.cpp (+1-1) - (modified) llvm/test/CodeGen/SystemZ/zos-ppa2.ll (+1-1) - (added) llvm/test/MC/GOFF/section.ll (+73) ``diff diff --git a/llvm/include/llvm/BinaryFormat/GOFF.h b/llvm/include/llvm/BinaryFormat/GOFF.h index 443bcfc9479a8..43d80e0c247e9 100644 --- a/llvm/include/llvm/BinaryFormat/GOFF.h +++ b/llvm/include/llvm/BinaryFormat/GOFF.h @@ -169,6 +169,91 @@ enum SubsectionKind : uint8_t { SK_PPA1 = 2, SK_PPA2 = 4, }; + +// The standard System/390 convention is to name the high-order (leftmost) bit +// in a byte as bit zero. The Flags type helps to set bits in byte according +// to this numeration order. +class Flags { + uint8_t Val; + + constexpr static uint8_t bits(uint8_t BitIndex, uint8_t Length, uint8_t Value, +uint8_t OldValue) { +uint8_t Pos = 8 - BitIndex - Length; +uint8_t Mask = ((1 << Length) - 1) << Pos; +Value = Value << Pos; +return (OldValue & ~Mask) | Value; + } + +public: + constexpr Flags() : Val(0) {} + constexpr Flags(uint8_t BitIndex, uint8_t Length, uint8_t Value) + : Val(bits(BitIndex, Length, Value, 0)) {} + + template + constexpr void set(uint8_t BitIndex, uint8_t Length, T NewValue) { +Val = bits(BitIndex, Length, static_cast(NewValue), Val); + } + + template + constexpr T get(uint8_t BitIndex, uint8_t Length) const { +return static_cast((Val >> (8 - BitIndex - Length)) & + ((1 << Length) - 1)); + } + + constexpr operator uint8_t() const { return Val; } +}; + +// Structure for the flag field of a symbol. See +// https://www.ibm.com/docs/en/zos/3.1.0?topic=formats-external-symbol-definition-record, +// offset 41, for the definition. +struct SymbolFlags { + Flags SymFlags; + +#define GOFF_SYMBOL_FLAG(NAME, TYPE, BITINDEX, LENGTH) \ + void set##NAME(TYPE Val) { SymFlags.set(BITINDEX, LENGTH, Val); } \ + TYPE get##NAME() const { return SymFlags.get(BITINDEX, LENGTH); } + + GOFF_SYMBOL_FLAG(FillBytePresence, bool, 0, 1) + GOFF_SYMBOL_FLAG(Mangled, bool, 1, 1) + GOFF_SYMBOL_FLAG(Renameable, bool, 2, 1) + GOFF_SYMBOL_FLAG(RemovableClass, bool, 3, 1) + GOFF_SYMBOL_FLAG(ReservedQwords, ESDReserveQwords, 5, 3) + +#undef GOFF_SYMBOL_FLAG + +constexpr operator uint8_t() const { return static_cast(SymFlags); } +}; + +// Structure for the behavioral attributes. See +// https://www.ibm.com/docs/en/zos/3.1.0?topic=record-external-symbol-definition-behavioral-attributes +// for the definition. +struct BehavioralAttributes { + Flags Attr[10]; + +#define GOFF_BEHAVIORAL_ATTRIBUTE(NAME, TYPE, ATTRIDX, BITINDEX, LENGTH) \ + void set##NAME(TYPE Val) { Attr[ATTRIDX].set(BITINDEX, LENGTH, Val); } \ + TYPE get##NAME() const { return Attr[ATTRIDX].get(BITINDEX, LENGTH); } + + GOFF_BEHAVIORAL_ATTRIBUTE(Amode, GOFF::ESDAmode, 0, 0, 8) + GOFF_BEHAVIORAL_ATTRIBUTE(Rmode, GOFF::ESDRmode, 1, 0, 8) + GOFF_BEHAVIORAL_ATTRIBUTE(TextStyle, GOFF::ESDTextStyle, 2, 0, 4) + GOFF_BEHAVIORAL_ATTRIBUTE(BindingAlgorithm, GOFF::ESDBindingAlgorithm, 2, 4, +4) + GOFF_BEHAVIORAL_ATTRIBUTE(TaskingBehavior, GOFF::ESDTaskingBehavior, 3, 0, 3) + GOFF_BEHAVIORAL_ATTRIBUTE(ReadOnly, bool, 3, 4, 1) + GOFF_BEHAVIORAL_ATTRIBUTE(Executable, GOFF::ESDExecutable, 3, 5, 3) + GOFF_BEHAVIORAL_ATTRIBUTE(DuplicateSymbolSeverity, +GOFF::ESDDuplicateSymbolSeverity, 4, 2, 2) + GOFF_BEHAVIORAL_ATTRIBUTE(BindingStrength, GOFF::ESDBindingStrength, 4, 4, 4) + GOFF_BEHAVIORAL_ATTRIBUTE(LoadingBehavior, GOFF::ESDLoadingBehavior, 5, 0, 2) + GOFF_BEHAVIORAL_ATTRIBUTE(COMMON, bool, 5, 2, 1) + GOFF_BEHAVIORAL_ATTRIBUTE(IndirectReference, bool, 5, 3, 1) + GOFF_BEHAVIORAL_ATTRIBUTE(BindingScope, GOFF::ESDBindingScope, 5, 4, 4) + GOFF_BEHAVIORAL_ATTRIBUTE(LinkageType, GOFF::ESDLinkageType, 6, 2, 1) + GOFF_BEHAVIORAL_ATTRIBUTE(Alignment, GOFF::ESDAlignment, 6, 3, 5) + +#undef GOFF_BEHAVIORAL_ATTRIBUTE +}; } /
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: analyze functions without CFG information (PR #133461)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/133461 >From 5a549c4cdc7d452e0579608f51adeb67eff5efe7 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Wed, 19 Mar 2025 18:58:32 +0300 Subject: [PATCH 1/4] [BOLT] Gadget scanner: analyze functions without CFG information Support simple analysis of the functions for which BOLT is unable to reconstruct the CFG. This patch is inspired by the approach implemented by Kristof Beyls in the original prototype of gadget scanner, but a CFG-unaware counterpart of the data-flow analysis is implemented instead of separate version of gadget detector, as multiple gadget kinds are detected now. --- bolt/include/bolt/Core/BinaryFunction.h | 13 + bolt/include/bolt/Passes/PAuthGadgetScanner.h | 24 + bolt/lib/Passes/PAuthGadgetScanner.cpp| 266 +--- .../AArch64/gs-pacret-autiasp.s | 15 + .../binary-analysis/AArch64/gs-pauth-calls.s | 594 ++ 5 files changed, 835 insertions(+), 77 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index d3d11f8c5fb73..5cb2cc95af695 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -799,6 +799,19 @@ class BinaryFunction { return iterator_range(cie_begin(), cie_end()); } + /// Iterate over instructions (only if CFG is unavailable or not built yet). + iterator_range instrs() { +assert(!hasCFG() && "Iterate over basic blocks instead"); +return make_range(Instructions.begin(), Instructions.end()); + } + iterator_range instrs() const { +assert(!hasCFG() && "Iterate over basic blocks instead"); +return make_range(Instructions.begin(), Instructions.end()); + } + + /// Returns whether there are any labels at Offset. + bool hasLabelAt(unsigned Offset) const { return Labels.count(Offset) != 0; } + /// Iterate over all jump tables associated with this function. iterator_range::const_iterator> jumpTables() const { diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h index 622e6721dea55..aa44f8c565639 100644 --- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h +++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h @@ -67,6 +67,14 @@ struct MCInstInBFReference { uint64_t Offset; MCInstInBFReference(BinaryFunction *BF, uint64_t Offset) : BF(BF), Offset(Offset) {} + + static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) { +for (auto &I : BF.instrs()) + if (Inst == &I.second) +return MCInstInBFReference(&BF, I.first); +return {}; + } + MCInstInBFReference() : BF(nullptr), Offset(0) {} bool operator==(const MCInstInBFReference &RHS) const { return BF == RHS.BF && Offset == RHS.Offset; @@ -106,6 +114,12 @@ struct MCInstReference { MCInstReference(BinaryFunction *BF, uint32_t Offset) : MCInstReference(MCInstInBFReference(BF, Offset)) {} + static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) { +if (BF.hasCFG()) + return MCInstInBBReference::get(Inst, BF); +return MCInstInBFReference::get(Inst, BF); + } + bool operator<(const MCInstReference &RHS) const { if (ParentKind != RHS.ParentKind) return ParentKind < RHS.ParentKind; @@ -140,6 +154,16 @@ struct MCInstReference { llvm_unreachable(""); } + operator bool() const { +switch (ParentKind) { +case BasicBlockParent: + return U.BBRef.BB != nullptr; +case FunctionParent: + return U.BFRef.BF != nullptr; +} +llvm_unreachable(""); + } + uint64_t getAddress() const { switch (ParentKind) { case BasicBlockParent: diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index df9e87bd4e999..f5d224675d749 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -124,6 +124,27 @@ class TrackedRegisters { } }; +// Without CFG, we reset gadget scanning state when encountering an +// unconditional branch. Note that BC.MIB->isUnconditionalBranch neither +// considers indirect branches nor annotated tail calls as unconditional. +static bool isStateTrackingBoundary(const BinaryContext &BC, +const MCInst &Inst) { + const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); + // Adapted from llvm::MCInstrDesc::isUnconditionalBranch(). + return Desc.isBranch() && Desc.isBarrier(); +} + +template static void iterateOverInstrs(BinaryFunction &BF, T Fn) { + if (BF.hasCFG()) { +for (BinaryBasicBlock &BB : BF) + for (int64_t I = 0, E = BB.size(); I < E; ++I) +Fn(MCInstInBBReference(&BB, I)); + } else { +for (auto I : BF.instrs()) + Fn(MCInstInBFReference(&BF, I.first)); + } +} + // The security property that is checked is: // When a register is used as the address to jump to in a return instruction, // that register mus
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: analyze functions without CFG information (PR #133461)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/133461 >From 5a549c4cdc7d452e0579608f51adeb67eff5efe7 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Wed, 19 Mar 2025 18:58:32 +0300 Subject: [PATCH 1/4] [BOLT] Gadget scanner: analyze functions without CFG information Support simple analysis of the functions for which BOLT is unable to reconstruct the CFG. This patch is inspired by the approach implemented by Kristof Beyls in the original prototype of gadget scanner, but a CFG-unaware counterpart of the data-flow analysis is implemented instead of separate version of gadget detector, as multiple gadget kinds are detected now. --- bolt/include/bolt/Core/BinaryFunction.h | 13 + bolt/include/bolt/Passes/PAuthGadgetScanner.h | 24 + bolt/lib/Passes/PAuthGadgetScanner.cpp| 266 +--- .../AArch64/gs-pacret-autiasp.s | 15 + .../binary-analysis/AArch64/gs-pauth-calls.s | 594 ++ 5 files changed, 835 insertions(+), 77 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index d3d11f8c5fb73..5cb2cc95af695 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -799,6 +799,19 @@ class BinaryFunction { return iterator_range(cie_begin(), cie_end()); } + /// Iterate over instructions (only if CFG is unavailable or not built yet). + iterator_range instrs() { +assert(!hasCFG() && "Iterate over basic blocks instead"); +return make_range(Instructions.begin(), Instructions.end()); + } + iterator_range instrs() const { +assert(!hasCFG() && "Iterate over basic blocks instead"); +return make_range(Instructions.begin(), Instructions.end()); + } + + /// Returns whether there are any labels at Offset. + bool hasLabelAt(unsigned Offset) const { return Labels.count(Offset) != 0; } + /// Iterate over all jump tables associated with this function. iterator_range::const_iterator> jumpTables() const { diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h index 622e6721dea55..aa44f8c565639 100644 --- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h +++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h @@ -67,6 +67,14 @@ struct MCInstInBFReference { uint64_t Offset; MCInstInBFReference(BinaryFunction *BF, uint64_t Offset) : BF(BF), Offset(Offset) {} + + static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) { +for (auto &I : BF.instrs()) + if (Inst == &I.second) +return MCInstInBFReference(&BF, I.first); +return {}; + } + MCInstInBFReference() : BF(nullptr), Offset(0) {} bool operator==(const MCInstInBFReference &RHS) const { return BF == RHS.BF && Offset == RHS.Offset; @@ -106,6 +114,12 @@ struct MCInstReference { MCInstReference(BinaryFunction *BF, uint32_t Offset) : MCInstReference(MCInstInBFReference(BF, Offset)) {} + static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) { +if (BF.hasCFG()) + return MCInstInBBReference::get(Inst, BF); +return MCInstInBFReference::get(Inst, BF); + } + bool operator<(const MCInstReference &RHS) const { if (ParentKind != RHS.ParentKind) return ParentKind < RHS.ParentKind; @@ -140,6 +154,16 @@ struct MCInstReference { llvm_unreachable(""); } + operator bool() const { +switch (ParentKind) { +case BasicBlockParent: + return U.BBRef.BB != nullptr; +case FunctionParent: + return U.BFRef.BF != nullptr; +} +llvm_unreachable(""); + } + uint64_t getAddress() const { switch (ParentKind) { case BasicBlockParent: diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index df9e87bd4e999..f5d224675d749 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -124,6 +124,27 @@ class TrackedRegisters { } }; +// Without CFG, we reset gadget scanning state when encountering an +// unconditional branch. Note that BC.MIB->isUnconditionalBranch neither +// considers indirect branches nor annotated tail calls as unconditional. +static bool isStateTrackingBoundary(const BinaryContext &BC, +const MCInst &Inst) { + const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); + // Adapted from llvm::MCInstrDesc::isUnconditionalBranch(). + return Desc.isBranch() && Desc.isBarrier(); +} + +template static void iterateOverInstrs(BinaryFunction &BF, T Fn) { + if (BF.hasCFG()) { +for (BinaryBasicBlock &BB : BF) + for (int64_t I = 0, E = BB.size(); I < E; ++I) +Fn(MCInstInBBReference(&BB, I)); + } else { +for (auto I : BF.instrs()) + Fn(MCInstInBFReference(&BF, I.first)); + } +} + // The security property that is checked is: // When a register is used as the address to jump to in a return instruction, // that register mus
[llvm-branch-commits] [llvm] [BOLT] Make DataflowAnalysis::getStateBefore() const (NFC) (PR #133308)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/133308 >From 05852d602b5d1106067d859381fada2c780239b6 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 27 Mar 2025 21:06:55 +0300 Subject: [PATCH] [BOLT] Make DataflowAnalysis::getStateBefore() const (NFC) --- bolt/include/bolt/Passes/DataflowAnalysis.h | 11 +++ bolt/lib/Passes/PAuthGadgetScanner.cpp | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/bolt/include/bolt/Passes/DataflowAnalysis.h b/bolt/include/bolt/Passes/DataflowAnalysis.h index 2afaa6d3043a6..f6ca39cf6f860 100644 --- a/bolt/include/bolt/Passes/DataflowAnalysis.h +++ b/bolt/include/bolt/Passes/DataflowAnalysis.h @@ -292,14 +292,17 @@ class DataflowAnalysis { /// Relies on a ptr map to fetch the previous instruction and then retrieve /// state. WARNING: Watch out for invalidated pointers. Do not use this /// function if you invalidated pointers after the analysis has been completed - ErrorOr getStateBefore(const MCInst &Point) { -return getStateAt(PrevPoint[&Point]); + ErrorOr getStateBefore(const MCInst &Point) const { +auto It = PrevPoint.find(&Point); +if (It == PrevPoint.end()) + return make_error_code(std::errc::result_out_of_range); +return getStateAt(It->getSecond()); } - ErrorOr getStateBefore(ProgramPoint Point) { + ErrorOr getStateBefore(ProgramPoint Point) const { if (Point.isBB()) return getStateAt(*Point.getBB()); -return getStateAt(PrevPoint[Point.getInst()]); +return getStateBefore(*Point.getInst()); } /// Remove any state annotations left by this analysis diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index 00846247fdc21..df9e87bd4e999 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -443,7 +443,7 @@ class PacRetAnalysis public: std::vector getLastClobberingInsts(const MCInst &Inst, BinaryFunction &BF, - const ArrayRef UsedDirtyRegs) { + const ArrayRef UsedDirtyRegs) const { if (RegsToTrackInstsFor.empty()) return {}; auto MaybeState = getStateBefore(Inst); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -19,7 +19,7 @@ ; CHECK:.quad L#PPA2-CELQSTRT * A(PPA2-CELQSTRT) ; CHECK: L#PPA1_void_test_0: ; CHECK:.long L#PPA2-L#PPA1_void_test_0 * Offset to PPA2 -; CHECK:.section"B_IDRL" +; CHECK:.section".idrl" MaskRay wrote: Q: Is the previous "B_IDRL" wrong? https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Fix link order of BareMetal toolchain object (PR #132806)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132806 >From 9c67ffb3e6ef1a5365aed5e00f3b1b1aee82b28f Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 06:17:42 -0700 Subject: [PATCH] [Driver] Fix link order of BareMetal toolchain object The linker job in BareMetal toolchain object will be used by gnuld and lld both. However, gnuld process the arguments in the order in which they appear on command line, whereas there is no such restriction with lld. The previous order was: LibraryPaths -> Libraries -> LTOOptions -> LinkerInputs The new iorder is: LibraryPaths -> LTOOptions -> LinkerInputs -> Libraries LTO options need to be added before adding any linker inputs because file format after compile stage during LTO is bitcode which gnuld natively cannot process. Hence iwill need to pass appropriate plugins before adding any bitcode file on the command line. Object files that are getting linked need to be passed before processing any libraries so that gnuld can appropriately do symbol resolution for the symbols for which no definition is provided through user code. Similar link order is also followed by other linker jobs for gnuld such as in gnutools::Linker in Gnu.cpp This is the 3rd patch in the series of patches of merging RISCVToolchain into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: I0e68e403c08b5687cc3346e833981f7b9f3819c4 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 32 - clang/test/Driver/aarch64-toolchain-extra.c | 2 +- clang/test/Driver/aarch64-toolchain.c | 24 +++ clang/test/Driver/arm-toolchain-extra.c | 2 +- clang/test/Driver/arm-toolchain.c | 24 +++ clang/test/Driver/baremetal-multilib.yaml | 3 +- clang/test/Driver/baremetal-sysroot.cpp | 8 ++- clang/test/Driver/baremetal.cpp | 79 + 8 files changed, 98 insertions(+), 76 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 8343865cd4dc1..da864ac166736 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -529,8 +529,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple::ArchType Arch = TC.getArch(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); - CmdArgs.push_back("-Bstatic"); if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) @@ -580,6 +578,22 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, for (const auto &LibPath : TC.getLibraryPaths()) CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-L", LibPath))); + if (D.isUsingLTO()) { +assert(!Inputs.empty() && "Must have at least one input."); +// Find the first filename InputInfo object. +auto Input = llvm::find_if( +Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); }); +if (Input == Inputs.end()) + // For a very rare case, all of the inputs to the linker are + // InputArg. If that happens, just use the first InputInfo. + Input = Inputs.begin(); + +addLTOOptions(TC, Args, CmdArgs, Output, *Input, + D.getLTOMode() == LTOK_Thin); + } + + AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); + if (TC.ShouldLinkCXXStdlib(Args)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); @@ -600,20 +614,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("--end-group"); } - if (D.isUsingLTO()) { -assert(!Inputs.empty() && "Must have at least one input."); -// Find the first filename InputInfo object. -auto Input = llvm::find_if( -Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); }); -if (Input == Inputs.end()) - // For a very rare case, all of the inputs to the linker are - // InputArg. If that happens, just use the first InputInfo. - Input = Inputs.begin(); - -addLTOOptions(TC, Args, CmdArgs, Output, *Input, - D.getLTOMode() == LTOK_Thin); - } - if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) && NeedCRTs) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd))); diff --git a/clang/test/Driver/aarch64-toolchain-extra.c b/clang/test/Driver/aarch64-toolchain-extra.c index 2a930e35acd45..a0b5f2902962f 100644 --- a/clang/test/Driver/aarch64-toolchain-extra.c +++ b/clang/test/Driver/aarch64-toolchain-extra.c @@ -31,5 +31,5 @@ // C-AARCH64-BAREMETAL-NOGCC: "{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/lib/crt0.o" // C-AARCH64-BAREMETAL-NOGCC: "{{.*}}/aarch64-nogcc/{{.*}}/aarch64-none-elf/lib/crtbegin.o" // C-AARCH64-BAREMETAL-NOGCC:
[llvm-branch-commits] [clang] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object (PR #121831)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/121831 >From eaa96afd599216f042fd01fbe7f6f30a851add38 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 6 Jan 2025 10:05:08 -0800 Subject: [PATCH] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object This patch: - Adds CXXStdlib, runtimelib and unwindlib defaults for riscv target to BareMetal toolchain object. - Add riscv 32 and 64-bit emulation flags to linker job of BareMetal toolchain. - Removes call to RISCVToolChain object from llvm. This PR is last patch in the series of patches of merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: I2877ac328f55a7638cc185d6034866cbd2ac4203 --- clang/lib/Driver/CMakeLists.txt | 1 - clang/lib/Driver/Driver.cpp | 10 +- clang/lib/Driver/ToolChains/BareMetal.cpp | 33 ++- clang/lib/Driver/ToolChains/BareMetal.h | 11 +- .../lib/Driver/ToolChains/RISCVToolchain.cpp | 232 -- clang/lib/Driver/ToolChains/RISCVToolchain.h | 67 - .../test/Driver/baremetal-undefined-symbols.c | 14 +- clang/test/Driver/baremetal.cpp | 44 ++-- clang/test/Driver/riscv32-toolchain-extra.c | 7 +- clang/test/Driver/riscv32-toolchain.c | 26 +- clang/test/Driver/riscv64-toolchain-extra.c | 7 +- clang/test/Driver/riscv64-toolchain.c | 20 +- 12 files changed, 91 insertions(+), 381 deletions(-) delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.cpp delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.h diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index 5bdb6614389cf..eee29af5d181a 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -74,7 +74,6 @@ add_clang_library(clangDriver ToolChains/OHOS.cpp ToolChains/OpenBSD.cpp ToolChains/PS4CPU.cpp - ToolChains/RISCVToolchain.cpp ToolChains/Solaris.cpp ToolChains/SPIRV.cpp ToolChains/SPIRVOpenMP.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 056bfcf1b739a..82b49da928a79 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -41,7 +41,6 @@ #include "ToolChains/PPCFreeBSD.h" #include "ToolChains/PPCLinux.h" #include "ToolChains/PS4CPU.h" -#include "ToolChains/RISCVToolchain.h" #include "ToolChains/SPIRV.h" #include "ToolChains/SPIRVOpenMP.h" #include "ToolChains/SYCL.h" @@ -6886,16 +6885,11 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::msp430: -TC = -std::make_unique(*this, Target, Args); +TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: -if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args)) - TC = - std::make_unique(*this, Target, Args); -else - TC = std::make_unique(*this, Target, Args); +TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::ve: TC = std::make_unique(*this, Target, Args); diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index c4e813a2f0e5a..5f48245de25d5 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -336,6 +336,26 @@ BareMetal::OrderedMultilibs BareMetal::getOrderedMultilibs() const { return llvm::reverse(Default); } +ToolChain::CXXStdlibType BareMetal::GetDefaultCXXStdlibType() const { + if (getTriple().isRISCV() && GCCInstallation.isValid()) +return ToolChain::CST_Libstdcxx; + return ToolChain::CST_Libcxx; +} + +ToolChain::RuntimeLibType BareMetal::GetDefaultRuntimeLibType() const { + if (getTriple().isRISCV() && GCCInstallation.isValid()) +return ToolChain::RLT_Libgcc; + return ToolChain::RLT_CompilerRT; +} + +ToolChain::UnwindLibType +BareMetal::GetUnwindLibType(const llvm::opt::ArgList &Args) const { + if (getTriple().isRISCV()) +return ToolChain::UNW_None; + + return ToolChain::GetUnwindLibType(Args); +} + void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (DriverArgs.hasArg(options::OPT_nostdinc)) @@ -534,8 +554,14 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-Bstatic"); - if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) -CmdArgs.push_back("--no-relax"); + if (Triple.isRISCV()) { +if (Args.hasArg(options::OPT_mno_relax)) + CmdArgs.push_back("--no-relax"); +CmdArgs.push_back("-m"); +CmdArgs.push_back(Arch == llvm::Triple::riscv64 ? "elf64lriscv" +
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/121830 >From 05930f0e10391684df5cc24ed4a3460583a0ccd7 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 04:58:57 -0700 Subject: [PATCH] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object This patch conditionalise the addition of crt{begin,end}.o object files along with addition of -lgloss lib based on whether libc selected is newlib or llvm libc. Since there is no way a user can specify which libc it wants to link against, currently passing valid GCCInstallation to driver will select newlib otherwise it will default to llvm libc. Moreover, this patch makes gnuld the default linker for baremetal toolchain object. User need to pass `-fuse-ld=lld` explicitly to driver to select lld This is the 2nd patch in the series of patches of merging RISCVToolchain into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ie06dc976c306cf04ec2733bbb2d271c57d201f86 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 38 - clang/lib/Driver/ToolChains/BareMetal.h | 3 +- clang/test/Driver/aarch64-toolchain-extra.c | 13 ++- clang/test/Driver/aarch64-toolchain.c | 83 +++ clang/test/Driver/arm-toolchain-extra.c | 7 ++ clang/test/Driver/arm-toolchain.c | 88 - clang/test/Driver/baremetal.cpp | 3 +- clang/test/Driver/sanitizer-ld.c| 2 +- 8 files changed, 224 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index b2e62e3d254af..8343865cd4dc1 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -545,9 +545,31 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool NeedCRTs = + !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); + + const char *CRTBegin, *CRTEnd; + if (NeedCRTs) { +if (!Args.hasArg(options::OPT_r)) + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) { + auto RuntimeLib = TC.GetRuntimeLibType(Args); + switch (TC.GetRuntimeLibType(Args)) { + case (ToolChain::RLT_Libgcc): { +CRTBegin = "crtbegin.o"; +CRTEnd = "crtend.o"; +break; + } + case (ToolChain::RLT_CompilerRT): { +CRTBegin = +TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object); +CRTEnd = +TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object); +break; + } + } + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTBegin))); +} } Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, @@ -570,9 +592,12 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { +CmdArgs.push_back("--start-group"); AddRunTimeLibs(TC, D, CmdArgs, Args); - CmdArgs.push_back("-lc"); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) + CmdArgs.push_back("-lgloss"); +CmdArgs.push_back("--end-group"); } if (D.isUsingLTO()) { @@ -588,6 +613,11 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, addLTOOptions(TC, Args, CmdArgs, Output, *Input, D.getLTOMode() == LTOK_Thin); } + + if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) && + NeedCRTs) +CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd))); + if (TC.getTriple().isRISCV()) CmdArgs.push_back("-X"); diff --git a/clang/lib/Driver/ToolChains/BareMetal.h b/clang/lib/Driver/ToolChains/BareMetal.h index 2a791e7672e5e..b4e556df111fb 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.h +++ b/clang/lib/Driver/ToolChains/BareMetal.h @@ -36,6 +36,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { Tool *buildStaticLibTool() const override; public: + bool hasValidGCCInstallation() const {return GCCInstallation.isValid(); } bool isBareMetal() const override { return true; } bool isCrossCompiling() const override { return true; } bool HasNativeLLVMSupport() const override { return true; } @@ -60,8 +61,6 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { return ToolChain::CST_Libcxx; } - const char *getDefaultLinker() const override { return "ld.lld"; } - void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
[llvm-branch-commits] [clang] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object (PR #132808)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132808 >From 3d7bc6fda9dd5d4c69ea36f26ffebff052fd3d7b Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 07:04:59 -0700 Subject: [PATCH] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object RISCVToolChain object passes `--sysroot` option from clang to gnuld. Adding the supprt for the same in BareMetal toolchain object. This is done as a part of the effort to merge RISCVToolchain object into BareMetal toolchain object. This is the 5th patch in the series of patches for merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ie830bf6d126fea46dc225e5ef97e14349765ba07 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 3 + clang/test/Driver/aarch64-toolchain.c | 5 +- clang/test/Driver/arm-toolchain.c | 3 + clang/test/Driver/baremetal.cpp | 96 +-- 4 files changed, 82 insertions(+), 25 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 556b163eb01a0..c4e813a2f0e5a 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -529,6 +529,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple::ArchType Arch = TC.getArch(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); + if (!D.SysRoot.empty()) +CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); + CmdArgs.push_back("-Bstatic"); if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c index d45be9a6ee649..eaf6585909518 100644 --- a/clang/test/Driver/aarch64-toolchain.c +++ b/clang/test/Driver/aarch64-toolchain.c @@ -17,6 +17,7 @@ // C-AARCH64-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // C-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// C-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // C-AARCH64-BAREMETAL: "-Bstatic" "-EL" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" @@ -53,6 +54,7 @@ // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1" // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// CXX-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL: "-Bstatic" "-EL" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" @@ -89,7 +91,8 @@ // CXX-AARCH64-BAREMETAL-LIBCXX: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/v1" // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" -// CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld +// CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// CXX-AARCH64-BAREMETAL-LIBCXX: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-EL" // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c index d89f77b86c23b..ac4fe8d2271fb 100644 --- a/clang/test/Driver/arm-toolchain.c +++ b/clang/test/Driver/arm-toolchain.c @@ -17,6 +17,7 @@ // C-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" // C-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld" +// C-ARM-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_arm_gc
[llvm-branch-commits] [clang] [Driver] Fix link order of BareMetal toolchain object (PR #132806)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132806 >From 34b8356347f3a8a7dd729e547c888448f41dfb99 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 06:17:42 -0700 Subject: [PATCH] [Driver] Fix link order of BareMetal toolchain object The linker job in BareMetal toolchain object will be used by gnuld and lld both. However, gnuld process the arguments in the order in which they appear on command line, whereas there is no such restriction with lld. The previous order was: LibraryPaths -> Libraries -> LTOOptions -> LinkerInputs The new iorder is: LibraryPaths -> LTOOptions -> LinkerInputs -> Libraries LTO options need to be added before adding any linker inputs because file format after compile stage during LTO is bitcode which gnuld natively cannot process. Hence iwill need to pass appropriate plugins before adding any bitcode file on the command line. Object files that are getting linked need to be passed before processing any libraries so that gnuld can appropriately do symbol resolution for the symbols for which no definition is provided through user code. Similar link order is also followed by other linker jobs for gnuld such as in gnutools::Linker in Gnu.cpp This is the 3rd patch in the series of patches of merging RISCVToolchain into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: I0e68e403c08b5687cc3346e833981f7b9f3819c4 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 32 - clang/test/Driver/aarch64-toolchain-extra.c | 2 +- clang/test/Driver/aarch64-toolchain.c | 24 +++ clang/test/Driver/arm-toolchain-extra.c | 2 +- clang/test/Driver/arm-toolchain.c | 24 +++ clang/test/Driver/baremetal-multilib.yaml | 3 +- clang/test/Driver/baremetal-sysroot.cpp | 8 ++- clang/test/Driver/baremetal.cpp | 79 + 8 files changed, 98 insertions(+), 76 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 8343865cd4dc1..da864ac166736 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -529,8 +529,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple::ArchType Arch = TC.getArch(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); - CmdArgs.push_back("-Bstatic"); if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) @@ -580,6 +578,22 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, for (const auto &LibPath : TC.getLibraryPaths()) CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-L", LibPath))); + if (D.isUsingLTO()) { +assert(!Inputs.empty() && "Must have at least one input."); +// Find the first filename InputInfo object. +auto Input = llvm::find_if( +Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); }); +if (Input == Inputs.end()) + // For a very rare case, all of the inputs to the linker are + // InputArg. If that happens, just use the first InputInfo. + Input = Inputs.begin(); + +addLTOOptions(TC, Args, CmdArgs, Output, *Input, + D.getLTOMode() == LTOK_Thin); + } + + AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); + if (TC.ShouldLinkCXXStdlib(Args)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); @@ -600,20 +614,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("--end-group"); } - if (D.isUsingLTO()) { -assert(!Inputs.empty() && "Must have at least one input."); -// Find the first filename InputInfo object. -auto Input = llvm::find_if( -Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); }); -if (Input == Inputs.end()) - // For a very rare case, all of the inputs to the linker are - // InputArg. If that happens, just use the first InputInfo. - Input = Inputs.begin(); - -addLTOOptions(TC, Args, CmdArgs, Output, *Input, - D.getLTOMode() == LTOK_Thin); - } - if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) && NeedCRTs) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd))); diff --git a/clang/test/Driver/aarch64-toolchain-extra.c b/clang/test/Driver/aarch64-toolchain-extra.c index 2a930e35acd45..a0b5f2902962f 100644 --- a/clang/test/Driver/aarch64-toolchain-extra.c +++ b/clang/test/Driver/aarch64-toolchain-extra.c @@ -31,5 +31,5 @@ // C-AARCH64-BAREMETAL-NOGCC: "{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/lib/crt0.o" // C-AARCH64-BAREMETAL-NOGCC: "{{.*}}/aarch64-nogcc/{{.*}}/aarch64-none-elf/lib/crtbegin.o" // C-AARCH64-BAREMETAL-NOGCC:
[llvm-branch-commits] [clang] [Driver] Add option to force undefined symbols during linking in BareMetal toolchain object. (PR #132807)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132807 >From b3b8c8e629ea6fca9444f01ebd51939eb4fe1673 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 06:49:09 -0700 Subject: [PATCH] [Driver] Add option to force udnefined symbols during linking in BareMetal toolchain object. Add support for `-u` option to force defined symbols. This option is supported by both lld and gnuld. This is done as a part of the effort to merge RISCVToolchain object into BareMetal toolchain object. This is the 4th patch in the series of patches for merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ia6597c756923a77fd9c7cb9a6ae8e52a56f5457d --- clang/lib/Driver/ToolChains/BareMetal.cpp | 5 +++-- clang/test/Driver/baremetal-undefined-symbols.c | 15 +++ clang/test/Driver/riscv-args.c | 6 -- 3 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 clang/test/Driver/baremetal-undefined-symbols.c delete mode 100644 clang/test/Driver/riscv-args.c diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index da864ac166736..556b163eb01a0 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -570,8 +570,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, -options::OPT_s, options::OPT_t, options::OPT_r}); + Args.addAllArgs(CmdArgs, + {options::OPT_L, options::OPT_u, options::OPT_T_Group, + options::OPT_s, options::OPT_t, options::OPT_r}); TC.AddFilePathLibArgs(Args, CmdArgs); diff --git a/clang/test/Driver/baremetal-undefined-symbols.c b/clang/test/Driver/baremetal-undefined-symbols.c new file mode 100644 index 0..0ce0db43bccad --- /dev/null +++ b/clang/test/Driver/baremetal-undefined-symbols.c @@ -0,0 +1,15 @@ +// Check the arguments are correctly passed + +// Make sure -T is the last with gcc-toolchain option +// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-LD %s +// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds" + +// TODO: Merge this test with the above in the last patch when finally integrating riscv +// Make sure -T is the last with gcc-toolchain option +// RUN: %clang -### --target=aarch64-none-elf --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ARM-LD %s +// RUN: %clang -### --target=armv6m-none-eabi --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ARM-LD %s +// CHECK-ARM-LD: {{.*}} "-T" "a.lds" "-u" "foo" {{.*}} "--defsym=FOO=10" + diff --git a/clang/test/Driver/riscv-args.c b/clang/test/Driver/riscv-args.c deleted file mode 100644 index cab08e5b0f811..0 --- a/clang/test/Driver/riscv-args.c +++ /dev/null @@ -1,6 +0,0 @@ -// Check the arguments are correctly passed - -// Make sure -T is the last with gcc-toolchain option -// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-LD %s -// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object (PR #132808)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132808 >From c74186f7a40031a04113d7cd17841152e4dd4229 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 07:04:59 -0700 Subject: [PATCH] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object RISCVToolChain object passes `--sysroot` option from clang to gnuld. Adding the supprt for the same in BareMetal toolchain object. This is done as a part of the effort to merge RISCVToolchain object into BareMetal toolchain object. This is the 5th patch in the series of patches for merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ie830bf6d126fea46dc225e5ef97e14349765ba07 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 3 + clang/test/Driver/aarch64-toolchain.c | 5 +- clang/test/Driver/arm-toolchain.c | 3 + clang/test/Driver/baremetal.cpp | 96 +-- 4 files changed, 82 insertions(+), 25 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 556b163eb01a0..c4e813a2f0e5a 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -529,6 +529,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple::ArchType Arch = TC.getArch(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); + if (!D.SysRoot.empty()) +CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); + CmdArgs.push_back("-Bstatic"); if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c index d45be9a6ee649..eaf6585909518 100644 --- a/clang/test/Driver/aarch64-toolchain.c +++ b/clang/test/Driver/aarch64-toolchain.c @@ -17,6 +17,7 @@ // C-AARCH64-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // C-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// C-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // C-AARCH64-BAREMETAL: "-Bstatic" "-EL" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" @@ -53,6 +54,7 @@ // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1" // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// CXX-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL: "-Bstatic" "-EL" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" @@ -89,7 +91,8 @@ // CXX-AARCH64-BAREMETAL-LIBCXX: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/v1" // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" -// CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld +// CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// CXX-AARCH64-BAREMETAL-LIBCXX: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-EL" // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c index d89f77b86c23b..ac4fe8d2271fb 100644 --- a/clang/test/Driver/arm-toolchain.c +++ b/clang/test/Driver/arm-toolchain.c @@ -17,6 +17,7 @@ // C-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" // C-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld" +// C-ARM-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_arm_gc
[llvm-branch-commits] [clang] release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) (PR #134295)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/134295 Backport 2d1517d Requested by: @Artem-B >From b55d12882f761ecbefe488c7357b7ff55a2bd56e Mon Sep 17 00:00:00 2001 From: Austin Schuh Date: Wed, 26 Mar 2025 13:54:58 -0700 Subject: [PATCH] cuda clang: Fix argument order for __reduce_max_sync (#132881) Fixes: https://github.com/llvm/llvm-project/issues/131415 - Signed-off-by: Austin Schuh (cherry picked from commit 2d1517d257fcbd0c9bce14badc7646e94d81ea2b) --- clang/lib/Headers/__clang_cuda_intrinsics.h | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index a04e8b6de44d0..8b230af6f6647 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -515,32 +515,32 @@ __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) { #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 __device__ inline unsigned __reduce_add_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_add(__mask, __value); + return __nvvm_redux_sync_add(__value, __mask); } __device__ inline unsigned __reduce_min_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_umin(__mask, __value); + return __nvvm_redux_sync_umin(__value, __mask); } __device__ inline unsigned __reduce_max_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_umax(__mask, __value); + return __nvvm_redux_sync_umax(__value, __mask); } __device__ inline int __reduce_min_sync(unsigned __mask, int __value) { - return __nvvm_redux_sync_min(__mask, __value); + return __nvvm_redux_sync_min(__value, __mask); } __device__ inline int __reduce_max_sync(unsigned __mask, int __value) { - return __nvvm_redux_sync_max(__mask, __value); + return __nvvm_redux_sync_max(__value, __mask); } __device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_or(__mask, __value); + return __nvvm_redux_sync_or(__value, __mask); } __device__ inline unsigned __reduce_and_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_and(__mask, __value); + return __nvvm_redux_sync_and(__value, __mask); } __device__ inline unsigned __reduce_xor_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_xor(__mask, __value); + return __nvvm_redux_sync_xor(__value, __mask); } __device__ inline void __nv_memcpy_async_shared_global_4(void *__dst, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
@@ -545,9 +545,27 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool WantCRTs = petrhosek wrote: This is just a suggestion, but I think `NeedsCRT` would be more accurate. ```suggestion bool NeedCRT = ``` https://github.com/llvm/llvm-project/pull/121830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Driver] Add option to force undefined symbols during linking in BareMetal toolchain object. (PR #132807)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132807 >From 789d8f235791aa02915259ec59d679c6b02ccae9 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 06:49:09 -0700 Subject: [PATCH] [Driver] Add option to force udnefined symbols during linking in BareMetal toolchain object. Add support for `-u` option to force defined symbols. This option is supported by both lld and gnuld. This is done as a part of the effort to merge RISCVToolchain object into BareMetal toolchain object. This is the 4th patch in the series of patches for merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ia6597c756923a77fd9c7cb9a6ae8e52a56f5457d --- clang/lib/Driver/ToolChains/BareMetal.cpp | 5 +++-- clang/test/Driver/baremetal-undefined-symbols.c | 15 +++ clang/test/Driver/riscv-args.c | 6 -- 3 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 clang/test/Driver/baremetal-undefined-symbols.c delete mode 100644 clang/test/Driver/riscv-args.c diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index da864ac166736..556b163eb01a0 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -570,8 +570,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, -options::OPT_s, options::OPT_t, options::OPT_r}); + Args.addAllArgs(CmdArgs, + {options::OPT_L, options::OPT_u, options::OPT_T_Group, + options::OPT_s, options::OPT_t, options::OPT_r}); TC.AddFilePathLibArgs(Args, CmdArgs); diff --git a/clang/test/Driver/baremetal-undefined-symbols.c b/clang/test/Driver/baremetal-undefined-symbols.c new file mode 100644 index 0..0ce0db43bccad --- /dev/null +++ b/clang/test/Driver/baremetal-undefined-symbols.c @@ -0,0 +1,15 @@ +// Check the arguments are correctly passed + +// Make sure -T is the last with gcc-toolchain option +// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-LD %s +// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds" + +// TODO: Merge this test with the above in the last patch when finally integrating riscv +// Make sure -T is the last with gcc-toolchain option +// RUN: %clang -### --target=aarch64-none-elf --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ARM-LD %s +// RUN: %clang -### --target=armv6m-none-eabi --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ARM-LD %s +// CHECK-ARM-LD: {{.*}} "-T" "a.lds" "-u" "foo" {{.*}} "--defsym=FOO=10" + diff --git a/clang/test/Driver/riscv-args.c b/clang/test/Driver/riscv-args.c deleted file mode 100644 index cab08e5b0f811..0 --- a/clang/test/Driver/riscv-args.c +++ /dev/null @@ -1,6 +0,0 @@ -// Check the arguments are correctly passed - -// Make sure -T is the last with gcc-toolchain option -// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-LD %s -// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Add missing release note for LLVM 20 about zip_view (#134144) (PR #134313)
https://github.com/frederick-vs-ja approved this pull request. https://github.com/llvm/llvm-project/pull/134313 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
redstar wrote: > Thinking a bit more about this, it looks to me that we should treat SD/ED/PR > on the one hand differently from LD (and ER) on the other. The former > identify a range of address space and may hold contents of those ranges in > the form of text records; the latter identify a single address (and hold no > content of their own). Yes, that is correct. > From that perspective, the former correspond to the "section" concept, while > the latter correspond to the "symbol" concept. Now, among the section types > SD/ED/PR, GOFF is a bit special in that those are nested - this is somewhat > similar to the subsection concept, but it is explicit in the object file > format (as opposed to, say, ELF subsections). I try to implement this. Well, first I'll fix the failing test cases https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
@@ -2031,17 +2033,19 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// scalar value. class VPPartialReductionRecipe : public VPSingleDefRecipe { unsigned Opcode; + unsigned ScaleFactor; SamTebbs33 wrote: Done. https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SDAG] Introduce inbounds flag for pointer arithmetic (PR #131862)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/131862 >From bd22087405bdf669eca230dcf0656dd14b290083 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 17 Mar 2025 06:51:16 -0400 Subject: [PATCH] [SDAG] Introduce inbounds flag for pointer arithmetic This patch introduces an inbounds SDNodeFlag, to show that a pointer addition SDNode implements an inbounds getelementptr operation (i.e., the pointer operand is in bounds wrt. the allocated object it is based on, and the arithmetic does not change that). The flag is set in the DAG construction when lowering inbounds GEPs. Inbounds information is useful in the ISel when selecting memory instructions that perform address computations whose intermediate steps must be in the same memory region as the final result. A follow-up patch will start using it for AMDGPU's flat memory instructions, where the immediate offset must not affect the memory aperture of the address. A similar patch for gMIR and GlobalISel will follow. For SWDEV-516125. --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h| 9 +++-- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp| 3 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 3 +++ .../CodeGen/X86/merge-store-partially-alias-loads.ll | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 2283f99202e2f..13ac65f5d731c 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -415,12 +415,15 @@ struct SDNodeFlags { Unpredictable = 1 << 13, // Compare instructions which may carry the samesign flag. SameSign = 1 << 14, +// Pointer arithmetic instructions that remain in bounds, e.g., implementing +// an inbounds GEP. +InBounds = 1 << 15, // NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below // the class definition when adding new flags. PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint | -NonNeg | NoNaNs | NoInfs | SameSign, +NonNeg | NoNaNs | NoInfs | SameSign | InBounds, FastMathFlags = NoNaNs | NoInfs | NoSignedZeros | AllowReciprocal | AllowContract | ApproximateFuncs | AllowReassociation, }; @@ -455,6 +458,7 @@ struct SDNodeFlags { void setAllowReassociation(bool b) { setFlag(b); } void setNoFPExcept(bool b) { setFlag(b); } void setUnpredictable(bool b) { setFlag(b); } + void setInBounds(bool b) { setFlag(b); } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return Flags & NoUnsignedWrap; } @@ -472,6 +476,7 @@ struct SDNodeFlags { bool hasAllowReassociation() const { return Flags & AllowReassociation; } bool hasNoFPExcept() const { return Flags & NoFPExcept; } bool hasUnpredictable() const { return Flags & Unpredictable; } + bool hasInBounds() const { return Flags & InBounds; } bool operator==(const SDNodeFlags &Other) const { return Flags == Other.Flags; @@ -481,7 +486,7 @@ struct SDNodeFlags { }; LLVM_DECLARE_ENUM_AS_BITMASK(decltype(SDNodeFlags::None), - SDNodeFlags::SameSign); + SDNodeFlags::InBounds); inline SDNodeFlags operator|(SDNodeFlags LHS, SDNodeFlags RHS) { LHS |= RHS; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e3c34382d6354..e8336399d289f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4284,6 +4284,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (int64_t(Offset) >= 0 && NW.hasNoUnsignedSignedWrap())) Flags |= SDNodeFlags::NoUnsignedWrap; +Flags.setInBounds(NW.isInBounds()); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, DAG.getConstant(Offset, dl, N.getValueType()), Flags); @@ -4327,6 +4328,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (Offs.isNonNegative() && NW.hasNoUnsignedSignedWrap())) Flags.setNoUnsignedWrap(true); +Flags.setInBounds(NW.isInBounds()); OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); @@ -4389,6 +4391,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // pointer index type (add nuw). SDNodeFlags AddFlags; AddFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap()); + AddFlags.setInBounds(NW.isInBounds()); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp ind
[llvm-branch-commits] [clang-tools-extra] release/20.x: [clang-tidy] Fix broken HeaderFilterRegex when read from config file (#133582) (PR #134215)
https://github.com/carlosgalvezp updated https://github.com/llvm/llvm-project/pull/134215 >From d868cc50a9aa0884a360c20179a8a921417d867c Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Thu, 3 Apr 2025 09:28:34 +0200 Subject: [PATCH] [clang-tidy] Fix broken HeaderFilterRegex when read from config file (#133582) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR https://github.com/llvm/llvm-project/pull/91400 broke the usage of HeaderFilterRegex via config file, because it is now created at a different point in the execution and leads to a different value. The result of that is that using HeaderFilterRegex only in the config file does NOT work, in other words clang-tidy stops triggering warnings on header files, thereby losing a lot of coverage. This patch reverts the logic so that the header filter is created upon calling the getHeaderFilter() function. Additionally, this patch adds 2 unit tests to prevent regressions in the future: - One of them, "simple", tests the most basic use case with a single top-level .clang-tidy file. - The second one, "inheritance", demonstrates that the subfolder only gets warnings from headers within it, and not from parent headers. Fixes #118009 Fixes #121969 Fixes #133453 Co-authored-by: Carlos Gálvez (cherry picked from commit 6333fa5160fbde4bd2cf6afe8856695c13ab621f) --- .../ClangTidyDiagnosticConsumer.cpp | 36 ++- .../clang-tidy/ClangTidyDiagnosticConsumer.h | 4 +++ .../clang-tidy/ClangTidyOptions.cpp | 4 +-- clang-tools-extra/docs/ReleaseNotes.rst | 3 ++ .../inheritance/.clang-tidy | 1 + .../inheritance/foo.cpp | 3 ++ .../inheritance/foo.h | 1 + .../inheritance/subfolder/.clang-tidy | 2 ++ .../inheritance/subfolder/bar.cpp | 8 + .../inheritance/subfolder/bar.h | 1 + .../simple/.clang-tidy| 1 + .../simple/foo.cpp| 3 ++ .../simple/foo.h | 1 + 13 files changed, 49 insertions(+), 19 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/.clang-tidy create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.cpp create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.h create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/.clang-tidy create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.cpp create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.h create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/.clang-tidy create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.cpp create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.h diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index 4c75b42270114..71e852545203e 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -311,18 +311,7 @@ ClangTidyDiagnosticConsumer::ClangTidyDiagnosticConsumer( : Context(Ctx), ExternalDiagEngine(ExternalDiagEngine), RemoveIncompatibleErrors(RemoveIncompatibleErrors), GetFixesFromNotes(GetFixesFromNotes), - EnableNolintBlocks(EnableNolintBlocks) { - - if (Context.getOptions().HeaderFilterRegex && - !Context.getOptions().HeaderFilterRegex->empty()) -HeaderFilter = -std::make_unique(*Context.getOptions().HeaderFilterRegex); - - if (Context.getOptions().ExcludeHeaderFilterRegex && - !Context.getOptions().ExcludeHeaderFilterRegex->empty()) -ExcludeHeaderFilter = std::make_unique( -*Context.getOptions().ExcludeHeaderFilterRegex); -} + EnableNolintBlocks(EnableNolintBlocks) {} void ClangTidyDiagnosticConsumer::finalizeLastError() { if (!Errors.empty()) { @@ -571,17 +560,30 @@ void ClangTidyDiagnosticConsumer::checkFilters(SourceLocation Location, } StringRef FileName(File->getName()); - LastErrorRelatesToUserCode = - LastErrorRelatesToUserCode || Sources.isInMainFile(Location) || - (HeaderFilter && - (HeaderFilter->match(FileName) && -!(ExcludeHeaderFilter && ExcludeHeaderFilter->match(FileName; + LastErrorRelatesToUserCode = LastErrorRelatesToUserCode || + Sources.isInMainFile(Location) || + (getHeade
[llvm-branch-commits] [mlir] [mlir][memref] Check memory space before lowering alloc ops (PR #134427)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Matthias Springer (matthias-springer) Changes Check the memory space before lowering allocation ops, instead of starting the lowering and then rolling back the pattern when the memory space was found to be incompatible with LLVM. Note: This is in preparation of the One-Shot Dialect Conversion refactoring. Depends on #134421. --- Full diff: https://github.com/llvm/llvm-project/pull/134427.diff 4 Files Affected: - (modified) mlir/include/mlir/Conversion/LLVMCommon/Pattern.h (+5-3) - (modified) mlir/lib/Conversion/LLVMCommon/Pattern.cpp (+6-3) - (modified) mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp (+1-6) - (modified) mlir/test/Conversion/MemRefToLLVM/invalid.mlir (+1-2) ``diff diff --git a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h index c65f7d7217be5..6f7811acec939 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h @@ -75,9 +75,11 @@ class ConvertToLLVMPattern : public ConversionPattern { ValueRange indices, ConversionPatternRewriter &rewriter) const; - /// Returns if the given memref has identity maps and the element type is - /// convertible to LLVM. - bool isConvertibleAndHasIdentityMaps(MemRefType type) const; + /// Returns if the given memref type is convertible to LLVM and has an + /// identity layout map. If `verifyMemorySpace` is set to "false", the memory + /// space of the memref type is ignored. + bool isConvertibleAndHasIdentityMaps(MemRefType type, + bool verifyMemorySpace = true) const; /// Returns the type of a pointer to an element of the memref. Type getElementPtrType(MemRefType type) const; diff --git a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp index 71b68619cc793..d11de1f44250c 100644 --- a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp @@ -98,10 +98,13 @@ Value ConvertToLLVMPattern::getStridedElementPtr( // Check if the MemRefType `type` is supported by the lowering. We currently // only support memrefs with identity maps. bool ConvertToLLVMPattern::isConvertibleAndHasIdentityMaps( -MemRefType type) const { - if (!typeConverter->convertType(type.getElementType())) +MemRefType type, bool verifyMemorySpace) const { + if (!type.getLayout().isIdentity()) return false; - return type.getLayout().isIdentity(); + // If the memory space should not be verified, just check the element type. + Type typeToVerify = + verifyMemorySpace ? static_cast(type) : type.getElementType(); + return static_cast(typeConverter->convertType(typeToVerify)); } Type ConvertToLLVMPattern::getElementPtrType(MemRefType type) const { diff --git a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp index c5b2e83df93dc..bad209a4ddecf 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp @@ -73,12 +73,7 @@ std::tuple AllocationOpLLVMLowering::allocateBufferManuallyAlign( MemRefType memRefType = getMemRefResultType(op); // Allocate the underlying buffer. Type elementPtrType = this->getElementPtrType(memRefType); - if (!elementPtrType) { -emitError(loc, "conversion of memref memory space ") -<< memRefType.getMemorySpace() -<< " to integer address space " - "failed. Consider adding memory space conversions."; - } + assert(elementPtrType && "could not compute element ptr type"); FailureOr allocFuncOp = getNotalignedAllocFn( getTypeConverter(), op->getParentWithTrait(), getIndexType()); diff --git a/mlir/test/Conversion/MemRefToLLVM/invalid.mlir b/mlir/test/Conversion/MemRefToLLVM/invalid.mlir index 61c67005a08fc..0d04bba96bcdb 100644 --- a/mlir/test/Conversion/MemRefToLLVM/invalid.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/invalid.mlir @@ -22,7 +22,7 @@ func.func @bad_address_space(%a: memref<2xindex, "foo">) { // CHECK-LABEL: @invalid_int_conversion func.func @invalid_int_conversion() { - // expected-error@+1 {{conversion of memref memory space 1 : ui64 to integer address space failed. Consider adding memory space conversions.}} + // expected-error@unknown{{conversion of memref memory space 1 : ui64 to integer address space failed. Consider adding memory space conversions.}} %alloc = memref.alloc() {alignment = 64 : i64} : memref<10xf32, 1 : ui64> return } @@ -32,7 +32,6 @@ func.func @invalid_int_conversion() { // expected-error@unknown{{conversion of memref memory space #gpu.address_space to integer address space failed. Consider adding memory space conversions}} // CHECK-LABEL: @issue_70160 func.func @issue_70160() { - // expected-error@+1
[llvm-branch-commits] [mlir] [mlir][memref] Check memory space before lowering alloc ops (PR #134427)
https://github.com/matthias-springer created https://github.com/llvm/llvm-project/pull/134427 Check the memory space before lowering allocation ops, instead of starting the lowering and then rolling back the pattern when the memory space was found to be incompatible with LLVM. Note: This is in preparation of the One-Shot Dialect Conversion refactoring. Depends on #134421. >From bd104624a51dc315b94f651271b95b8b438a8146 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 4 Apr 2025 19:59:28 +0200 Subject: [PATCH] [mlir][memref] Check memory space before lowering alloc ops --- mlir/include/mlir/Conversion/LLVMCommon/Pattern.h| 8 +--- mlir/lib/Conversion/LLVMCommon/Pattern.cpp | 9 ++--- mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp | 7 +-- mlir/test/Conversion/MemRefToLLVM/invalid.mlir | 3 +-- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h index c65f7d7217be5..6f7811acec939 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h @@ -75,9 +75,11 @@ class ConvertToLLVMPattern : public ConversionPattern { ValueRange indices, ConversionPatternRewriter &rewriter) const; - /// Returns if the given memref has identity maps and the element type is - /// convertible to LLVM. - bool isConvertibleAndHasIdentityMaps(MemRefType type) const; + /// Returns if the given memref type is convertible to LLVM and has an + /// identity layout map. If `verifyMemorySpace` is set to "false", the memory + /// space of the memref type is ignored. + bool isConvertibleAndHasIdentityMaps(MemRefType type, + bool verifyMemorySpace = true) const; /// Returns the type of a pointer to an element of the memref. Type getElementPtrType(MemRefType type) const; diff --git a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp index 71b68619cc793..d11de1f44250c 100644 --- a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp @@ -98,10 +98,13 @@ Value ConvertToLLVMPattern::getStridedElementPtr( // Check if the MemRefType `type` is supported by the lowering. We currently // only support memrefs with identity maps. bool ConvertToLLVMPattern::isConvertibleAndHasIdentityMaps( -MemRefType type) const { - if (!typeConverter->convertType(type.getElementType())) +MemRefType type, bool verifyMemorySpace) const { + if (!type.getLayout().isIdentity()) return false; - return type.getLayout().isIdentity(); + // If the memory space should not be verified, just check the element type. + Type typeToVerify = + verifyMemorySpace ? static_cast(type) : type.getElementType(); + return static_cast(typeConverter->convertType(typeToVerify)); } Type ConvertToLLVMPattern::getElementPtrType(MemRefType type) const { diff --git a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp index c5b2e83df93dc..bad209a4ddecf 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp @@ -73,12 +73,7 @@ std::tuple AllocationOpLLVMLowering::allocateBufferManuallyAlign( MemRefType memRefType = getMemRefResultType(op); // Allocate the underlying buffer. Type elementPtrType = this->getElementPtrType(memRefType); - if (!elementPtrType) { -emitError(loc, "conversion of memref memory space ") -<< memRefType.getMemorySpace() -<< " to integer address space " - "failed. Consider adding memory space conversions."; - } + assert(elementPtrType && "could not compute element ptr type"); FailureOr allocFuncOp = getNotalignedAllocFn( getTypeConverter(), op->getParentWithTrait(), getIndexType()); diff --git a/mlir/test/Conversion/MemRefToLLVM/invalid.mlir b/mlir/test/Conversion/MemRefToLLVM/invalid.mlir index 61c67005a08fc..0d04bba96bcdb 100644 --- a/mlir/test/Conversion/MemRefToLLVM/invalid.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/invalid.mlir @@ -22,7 +22,7 @@ func.func @bad_address_space(%a: memref<2xindex, "foo">) { // CHECK-LABEL: @invalid_int_conversion func.func @invalid_int_conversion() { - // expected-error@+1 {{conversion of memref memory space 1 : ui64 to integer address space failed. Consider adding memory space conversions.}} + // expected-error@unknown{{conversion of memref memory space 1 : ui64 to integer address space failed. Consider adding memory space conversions.}} %alloc = memref.alloc() {alignment = 64 : i64} : memref<10xf32, 1 : ui64> return } @@ -32,7 +32,6 @@ func.func @invalid_int_conversion() { // expected-error@unknown{{conversion of memref memory space #gpu.address_space to in
[llvm-branch-commits] [clang] [Driver] Fix link order of BareMetal toolchain object (PR #132806)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132806 >From d74ec9a3ef550fba3f7dc9c7734c9d5bb096c288 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 06:17:42 -0700 Subject: [PATCH] [Driver] Fix link order of BareMetal toolchain object The linker job in BareMetal toolchain object will be used by gnuld and lld both. However, gnuld process the arguments in the order in which they appear on command line, whereas there is no such restriction with lld. The previous order was: LibraryPaths -> Libraries -> LTOOptions -> LinkerInputs The new iorder is: LibraryPaths -> LTOOptions -> LinkerInputs -> Libraries LTO options need to be added before adding any linker inputs because file format after compile stage during LTO is bitcode which gnuld natively cannot process. Hence iwill need to pass appropriate plugins before adding any bitcode file on the command line. Object files that are getting linked need to be passed before processing any libraries so that gnuld can appropriately do symbol resolution for the symbols for which no definition is provided through user code. Similar link order is also followed by other linker jobs for gnuld such as in gnutools::Linker in Gnu.cpp This is the 3rd patch in the series of patches of merging RISCVToolchain into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: I0e68e403c08b5687cc3346e833981f7b9f3819c4 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 32 - clang/test/Driver/aarch64-toolchain-extra.c | 2 +- clang/test/Driver/aarch64-toolchain.c | 24 +++ clang/test/Driver/arm-toolchain-extra.c | 2 +- clang/test/Driver/arm-toolchain.c | 24 +++ clang/test/Driver/baremetal-multilib.yaml | 3 +- clang/test/Driver/baremetal-sysroot.cpp | 8 ++- clang/test/Driver/baremetal.cpp | 79 + 8 files changed, 98 insertions(+), 76 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 2caf9aa7b8811..d19373490c18a 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -529,8 +529,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple::ArchType Arch = TC.getArch(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); - CmdArgs.push_back("-Bstatic"); if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) @@ -580,6 +578,22 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, for (const auto &LibPath : TC.getLibraryPaths()) CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-L", LibPath))); + if (D.isUsingLTO()) { +assert(!Inputs.empty() && "Must have at least one input."); +// Find the first filename InputInfo object. +auto Input = llvm::find_if( +Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); }); +if (Input == Inputs.end()) + // For a very rare case, all of the inputs to the linker are + // InputArg. If that happens, just use the first InputInfo. + Input = Inputs.begin(); + +addLTOOptions(TC, Args, CmdArgs, Output, *Input, + D.getLTOMode() == LTOK_Thin); + } + + AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); + if (TC.ShouldLinkCXXStdlib(Args)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); @@ -600,20 +614,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("--end-group"); } - if (D.isUsingLTO()) { -assert(!Inputs.empty() && "Must have at least one input."); -// Find the first filename InputInfo object. -auto Input = llvm::find_if( -Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); }); -if (Input == Inputs.end()) - // For a very rare case, all of the inputs to the linker are - // InputArg. If that happens, just use the first InputInfo. - Input = Inputs.begin(); - -addLTOOptions(TC, Args, CmdArgs, Output, *Input, - D.getLTOMode() == LTOK_Thin); - } - if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) && NeedCRTs) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd))); diff --git a/clang/test/Driver/aarch64-toolchain-extra.c b/clang/test/Driver/aarch64-toolchain-extra.c index 2a930e35acd45..a0b5f2902962f 100644 --- a/clang/test/Driver/aarch64-toolchain-extra.c +++ b/clang/test/Driver/aarch64-toolchain-extra.c @@ -31,5 +31,5 @@ // C-AARCH64-BAREMETAL-NOGCC: "{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/lib/crt0.o" // C-AARCH64-BAREMETAL-NOGCC: "{{.*}}/aarch64-nogcc/{{.*}}/aarch64-none-elf/lib/crtbegin.o" // C-AARCH64-BAREMETAL-NOGCC:
[llvm-branch-commits] [clang] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object (PR #132808)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132808 >From c5f98ad8993db4dc1386e393f8218f7e44cf9bb5 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 07:04:59 -0700 Subject: [PATCH] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object RISCVToolChain object passes `--sysroot` option from clang to gnuld. Adding the supprt for the same in BareMetal toolchain object. This is done as a part of the effort to merge RISCVToolchain object into BareMetal toolchain object. This is the 5th patch in the series of patches for merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ie830bf6d126fea46dc225e5ef97e14349765ba07 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 3 + clang/test/Driver/aarch64-toolchain.c | 5 +- clang/test/Driver/arm-toolchain.c | 3 + clang/test/Driver/baremetal.cpp | 96 +-- 4 files changed, 82 insertions(+), 25 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 658c79cafa839..6c9695ca5c2cd 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -529,6 +529,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple::ArchType Arch = TC.getArch(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); + if (!D.SysRoot.empty()) +CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); + CmdArgs.push_back("-Bstatic"); if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c index d45be9a6ee649..eaf6585909518 100644 --- a/clang/test/Driver/aarch64-toolchain.c +++ b/clang/test/Driver/aarch64-toolchain.c @@ -17,6 +17,7 @@ // C-AARCH64-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // C-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// C-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // C-AARCH64-BAREMETAL: "-Bstatic" "-EL" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" @@ -53,6 +54,7 @@ // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1" // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// CXX-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL: "-Bstatic" "-EL" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" @@ -89,7 +91,8 @@ // CXX-AARCH64-BAREMETAL-LIBCXX: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/v1" // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" -// CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld +// CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld" +// CXX-AARCH64-BAREMETAL-LIBCXX: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" // CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-EL" // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o" // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o" diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c index d89f77b86c23b..ac4fe8d2271fb 100644 --- a/clang/test/Driver/arm-toolchain.c +++ b/clang/test/Driver/arm-toolchain.c @@ -17,6 +17,7 @@ // C-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" // C-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld" +// C-ARM-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_arm_gc
[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/121830 >From 7013325dc42da6df36cfeaa40e88d86592055d5a Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 04:58:57 -0700 Subject: [PATCH] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object This patch conditionalise the addition of crt{begin,end}.o object files along with addition of -lgloss lib based on whether libc selected is newlib or llvm libc. Since there is no way a user can specify which libc it wants to link against, currently passing valid GCCInstallation to driver will select newlib otherwise it will default to llvm libc. Moreover, this patch makes gnuld the default linker for baremetal toolchain object. User need to pass `-fuse-ld=lld` explicitly to driver to select lld This is the 2nd patch in the series of patches of merging RISCVToolchain into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ie06dc976c306cf04ec2733bbb2d271c57d201f86 --- clang/lib/Driver/ToolChains/BareMetal.cpp | 38 - clang/lib/Driver/ToolChains/BareMetal.h | 3 +- clang/test/Driver/aarch64-toolchain-extra.c | 13 ++- clang/test/Driver/aarch64-toolchain.c | 83 +++ clang/test/Driver/arm-toolchain-extra.c | 7 ++ clang/test/Driver/arm-toolchain.c | 88 - clang/test/Driver/baremetal.cpp | 3 +- clang/test/Driver/sanitizer-ld.c| 2 +- 8 files changed, 224 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index b2e62e3d254af..2caf9aa7b8811 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -545,9 +545,31 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); + bool NeedCRTs = + !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); + + const char *CRTBegin, *CRTEnd; + if (NeedCRTs) { +if (!Args.hasArg(options::OPT_r)) + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o"))); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) { + auto RuntimeLib = TC.GetRuntimeLibType(Args); + switch (RuntimeLib) { + case (ToolChain::RLT_Libgcc): { +CRTBegin = "crtbegin.o"; +CRTEnd = "crtend.o"; +break; + } + case (ToolChain::RLT_CompilerRT): { +CRTBegin = +TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object); +CRTEnd = +TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object); +break; + } + } + CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTBegin))); +} } Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, @@ -570,9 +592,12 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { +CmdArgs.push_back("--start-group"); AddRunTimeLibs(TC, D, CmdArgs, Args); - CmdArgs.push_back("-lc"); +if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) + CmdArgs.push_back("-lgloss"); +CmdArgs.push_back("--end-group"); } if (D.isUsingLTO()) { @@ -588,6 +613,11 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, addLTOOptions(TC, Args, CmdArgs, Output, *Input, D.getLTOMode() == LTOK_Thin); } + + if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) && + NeedCRTs) +CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd))); + if (TC.getTriple().isRISCV()) CmdArgs.push_back("-X"); diff --git a/clang/lib/Driver/ToolChains/BareMetal.h b/clang/lib/Driver/ToolChains/BareMetal.h index 2a791e7672e5e..87f173342def2 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.h +++ b/clang/lib/Driver/ToolChains/BareMetal.h @@ -36,6 +36,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { Tool *buildStaticLibTool() const override; public: + bool hasValidGCCInstallation() const { return GCCInstallation.isValid(); } bool isBareMetal() const override { return true; } bool isCrossCompiling() const override { return true; } bool HasNativeLLVMSupport() const override { return true; } @@ -60,8 +61,6 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { return ToolChain::CST_Libcxx; } - const char *getDefaultLinker() const override { return "ld.lld"; } - void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
[llvm-branch-commits] [clang] [Driver] Add option to force undefined symbols during linking in BareMetal toolchain object. (PR #132807)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/132807 >From bfba89a42967789abe53863e9dbbce84332e1596 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 24 Mar 2025 06:49:09 -0700 Subject: [PATCH] [Driver] Add option to force udnefined symbols during linking in BareMetal toolchain object. Add support for `-u` option to force defined symbols. This option is supported by both lld and gnuld. This is done as a part of the effort to merge RISCVToolchain object into BareMetal toolchain object. This is the 4th patch in the series of patches for merging RISCVToolchain object into BareMetal toolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: Ia6597c756923a77fd9c7cb9a6ae8e52a56f5457d --- clang/lib/Driver/ToolChains/BareMetal.cpp | 5 +++-- clang/test/Driver/baremetal-undefined-symbols.c | 15 +++ clang/test/Driver/riscv-args.c | 6 -- 3 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 clang/test/Driver/baremetal-undefined-symbols.c delete mode 100644 clang/test/Driver/riscv-args.c diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index d19373490c18a..658c79cafa839 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -570,8 +570,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, -options::OPT_s, options::OPT_t, options::OPT_r}); + Args.addAllArgs(CmdArgs, + {options::OPT_L, options::OPT_u, options::OPT_T_Group, + options::OPT_s, options::OPT_t, options::OPT_r}); TC.AddFilePathLibArgs(Args, CmdArgs); diff --git a/clang/test/Driver/baremetal-undefined-symbols.c b/clang/test/Driver/baremetal-undefined-symbols.c new file mode 100644 index 0..0ce0db43bccad --- /dev/null +++ b/clang/test/Driver/baremetal-undefined-symbols.c @@ -0,0 +1,15 @@ +// Check the arguments are correctly passed + +// Make sure -T is the last with gcc-toolchain option +// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-LD %s +// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds" + +// TODO: Merge this test with the above in the last patch when finally integrating riscv +// Make sure -T is the last with gcc-toolchain option +// RUN: %clang -### --target=aarch64-none-elf --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ARM-LD %s +// RUN: %clang -### --target=armv6m-none-eabi --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ARM-LD %s +// CHECK-ARM-LD: {{.*}} "-T" "a.lds" "-u" "foo" {{.*}} "--defsym=FOO=10" + diff --git a/clang/test/Driver/riscv-args.c b/clang/test/Driver/riscv-args.c deleted file mode 100644 index cab08e5b0f811..0 --- a/clang/test/Driver/riscv-args.c +++ /dev/null @@ -1,6 +0,0 @@ -// Check the arguments are correctly passed - -// Make sure -T is the last with gcc-toolchain option -// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-LD %s -// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and XOR (PR #132382)
https://github.com/petar-avramovic created https://github.com/llvm/llvm-project/pull/132382 Uniform S1 is lowered to S32. Divergent S1 is selected as VCC(S1) instruction select will select SALU instruction based on wavesize (S32 or S64). S16 are selected as is. There are register classes for vgpr S16. Since some isel patterns check for sgpr S16 we don't lower to S32. For 32 and 64 bit types we use B32/B64 rules that cover scalar vector and pointers types. SALU B32 and B64 and VALU B32 instructions are available. Divergent B64 is lowered to B32. >From 8f5d0d72c04bd49f245c28c9118ca50aeac68bb5 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Fri, 21 Mar 2025 11:29:22 +0100 Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and XOR Uniform S1 is lowered to S32. Divergent S1 is selected as VCC(S1) instruction select will select SALU instruction based on wavesize (S32 or S64). S16 are selected as is. There are register classes for vgpr S16. Since some isel patterns check for sgpr S16 we don't lower to S32. For 32 and 64 bit types we use B32/B64 rules that cover scalar vector and pointers types. SALU B32 and B64 and VALU B32 instructions are available. Divergent B64 is lowered to B32. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 17 +++- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 10 ++- .../AMDGPU/AMDGPURegBankLegalizeRules.h | 2 + .../AMDGPU/GlobalISel/regbankselect-and.mir | 33 --- .../AMDGPU/GlobalISel/regbankselect-or.mir| 85 +-- .../AMDGPU/GlobalISel/regbankselect-xor.mir | 84 +- 6 files changed, 124 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index e4eaa01951a7f..5dbaa9488d668 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -219,11 +219,16 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, return; } case SplitTo32: { -auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg()); -auto Op2 = B.buildUnmerge(VgprRB_S32, MI.getOperand(2).getReg()); +Register Dst = MI.getOperand(0).getReg(); +LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32; +auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg()); +auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg()); unsigned Opc = MI.getOpcode(); -auto Lo = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)}); -auto Hi = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)}); +auto Flags = MI.getFlags(); +auto Lo = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, + Flags); +auto Hi = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, + Flags); B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi}); MI.eraseFromParent(); break; @@ -384,6 +389,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { case UniInVcc: return LLT::scalar(1); case Sgpr16: + case Vgpr16: return LLT::scalar(16); case Sgpr32: case Sgpr32Trunc: @@ -503,6 +509,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { case Sgpr32AExtBoolInReg: case Sgpr32SExt: return SgprRB; + case Vgpr16: case Vgpr32: case Vgpr64: case VgprP0: @@ -546,6 +553,7 @@ void RegBankLegalizeHelper::applyMappingDst( case SgprP4: case SgprP5: case SgprV4S32: +case Vgpr16: case Vgpr32: case Vgpr64: case VgprP0: @@ -677,6 +685,7 @@ void RegBankLegalizeHelper::applyMappingSrc( break; } // vgpr scalars, pointers and vectors +case Vgpr16: case Vgpr32: case Vgpr64: case VgprP0: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 7959bf30ca27d..96bc969dd1f40 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -106,6 +106,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg); case DivS1: return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg); + case DivS16: +return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg); case DivS32: return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg); case DivS64: @@ -441,6 +443,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB) .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}}) + .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}}) + .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}}) + .Uni(B32, {{SgprB32}
[llvm-branch-commits] [compiler-rt] release/20.x: [rtsan][Apple] Add interceptor for _os_nospin_lock_lock (#131034) (PR #132997)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/132997 Backport 481a55a3d9645a6bc1540d326319b78ad8ed8db1 Requested by: @wrotki >From e2c95b17d56f210927b9ec695126ae0d264c93e7 Mon Sep 17 00:00:00 2001 From: davidtrevelyan Date: Thu, 13 Mar 2025 10:18:25 + Subject: [PATCH] [rtsan][Apple] Add interceptor for _os_nospin_lock_lock (#131034) Follows the discussion here: https://github.com/llvm/llvm-project/pull/129309 Recently, the test `TestRtsan.AccessingALargeAtomicVariableDiesWhenRealtime` has been failing on newer MacOS versions, because the internal locking mechanism in `std::atomic::load` (for types `T` that are larger than the hardware lock-free limit), has changed to a function that wasn't being intercepted by rtsan. This PR introduces an interceptor for `_os_nospin_lock_lock`, which is the new internal locking mechanism. _Note: we'd probably do well to introduce interceptors for `_os_nospin_lock_unlock` (and `os_unfair_lock_unlock`) too, which also appear to have blocking implementations. This can follow in a separate PR._ (cherry picked from commit 481a55a3d9645a6bc1540d326319b78ad8ed8db1) --- .../lib/rtsan/rtsan_interceptors_posix.cpp| 11 +++ .../tests/rtsan_test_interceptors_posix.cpp | 19 +++ 2 files changed, 30 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 6816119065263..4d602a88ba9ae 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -30,6 +30,12 @@ extern "C" { typedef int32_t OSSpinLock; void OSSpinLockLock(volatile OSSpinLock *__lock); +// A pointer to this type is in the interface for `_os_nospin_lock_lock`, but +// it's an internal implementation detail of `os/lock.c` on Darwin, and +// therefore not available in any headers. As a workaround, we forward declare +// it here, which is enough to facilitate interception of _os_nospin_lock_lock. +struct _os_nospin_lock_s; +using _os_nospin_lock_t = _os_nospin_lock_s *; } #endif // TARGET_OS_MAC @@ -642,6 +648,11 @@ INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) { __rtsan_notify_intercepted_call("os_unfair_lock_lock"); return REAL(os_unfair_lock_lock)(lock); } + +INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) { + __rtsan_notify_intercepted_call("_os_nospin_lock_lock"); + return REAL(_os_nospin_lock_lock)(lock); +} #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK \ INTERCEPT_FUNCTION(os_unfair_lock_lock) #else diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 59663776366bb..75f723081c4b6 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1058,6 +1058,25 @@ TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) { ExpectRealtimeDeath(Func, "os_unfair_lock_lock"); ExpectNonRealtimeSurvival(Func); } + +// We intercept _os_nospin_lock_lock because it's the internal +// locking mechanism for MacOS's atomic implementation for data +// types that are larger than the hardware's maximum lock-free size. +// However, it's a private implementation detail and not visible in any headers, +// so we must duplicate the required type definitions to forward declaration +// what we need here. +extern "C" { +struct _os_nospin_lock_s { + unsigned int oul_value; +}; +void _os_nospin_lock_lock(_os_nospin_lock_s *); +} +TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) { + _os_nospin_lock_s lock{}; + auto Func = [&]() { _os_nospin_lock_lock(&lock); }; + ExpectRealtimeDeath(Func, "_os_nospin_lock_lock"); + ExpectNonRealtimeSurvival(Func); +} #endif #if SANITIZER_LINUX ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)
https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/132642 This patch migrates the CI over to the new compute_projects.py script for calculating what projects need to be tested based on a change to LLVM. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg (PR #132385)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/132385 >From 9f92e94f7171f9e04405dee69b0f69ea80fbeb2f Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Fri, 21 Mar 2025 13:12:11 +0100 Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg Uniform S16 shifts have to be extended to S32 using appropriate Extend before lowering to S32 instruction. Uniform packed V2S16 are lowered to SGPR S32 instructions, other option is to use VALU packed V2S16 and ReadAnyLane. For uniform S32 and S64 and divergent S16, S32, S64 and V2S16 there are instructions available. --- .../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 2 +- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 101 ++ .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 4 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 43 +++- .../AMDGPU/AMDGPURegBankLegalizeRules.h | 11 ++ llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll | 10 +- llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll | 187 +- .../AMDGPU/GlobalISel/regbankselect-ashr.mir | 6 +- .../AMDGPU/GlobalISel/regbankselect-lshr.mir | 17 +- .../GlobalISel/regbankselect-sext-inreg.mir | 24 +-- .../AMDGPU/GlobalISel/regbankselect-shl.mir | 6 +- .../CodeGen/AMDGPU/GlobalISel/sext_inreg.ll | 34 ++-- llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll| 10 +- 13 files changed, 304 insertions(+), 151 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index 44f1b5419abb9..3ff5a096c42b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -306,7 +306,7 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) { // Opcodes that support pretty much all combinations of reg banks and LLTs // (except S1). There is no point in writing rules for them. if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES || -Opc == AMDGPU::G_MERGE_VALUES) { +Opc == AMDGPU::G_MERGE_VALUES || Opc == G_BITCAST) { RBLHelper.applyMappingTrivial(*MI); continue; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 0f5f3545ac8eb..2182fcec22fc3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -130,6 +130,28 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, MI.eraseFromParent(); } +std::pair RegBankLegalizeHelper::unpackZExt(Register Reg) { + auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg); + auto Mask = B.buildConstant(SgprRB_S32, 0x); + auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask); + auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16)); + return {Lo.getReg(0), Hi.getReg(0)}; +} + +std::pair RegBankLegalizeHelper::unpackSExt(Register Reg) { + auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg); + auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16); + auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16)); + return {Lo.getReg(0), Hi.getReg(0)}; +} + +std::pair RegBankLegalizeHelper::unpackAExt(Register Reg) { + auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg); + auto Lo = PackedS32; + auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16)); + return {Lo.getReg(0), Hi.getReg(0)}; +} + void RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet &WaterfallSgprs) { @@ -259,6 +281,33 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, MI.eraseFromParent(); break; } + case SExtInRegSplitTo32: { +auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg()); +int Amt = MI.getOperand(2).getImm(); +Register Lo, Hi; +// Hi|Lo: s sign bit, ?/x bits changed/not changed by sign-extend +if (Amt <= 32) { + auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0)); + if (Amt == 32) { +// Hi|Lo: |sxxx -> |sxxx +Lo = Freeze.getReg(0); + } else { +// Hi|Lo: |???s -> | +Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0); + } + + auto SignExtCst = B.buildConstant(SgprRB_S32, 31); + Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0); +} else { + // Hi|Lo: ?sxx| -> ssxx| + Lo = Op1.getReg(0); + Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0); +} + +B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi}); +MI.eraseFromParent(); +break; + } case Div_BFE: { Register Dst = MI.getOperand(0).getReg(); assert(MRI.getType(Dst) == LLT::scalar(64)); @@ -356,6 +405,37 @@ void RegBankLegalizeH
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
@@ -635,6 +635,13 @@ class CGDebugInfo { llvm::DILocation *CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg); + /// Create a debug location from `Location` that adds an artificial inline + /// frame where the frame name is FuncName + /// + /// This is used to indiciate instructions that come from compiler + /// instrumentation. + llvm::DILocation *CreateSyntheticInline(llvm::DebugLoc Location, fmayer wrote: Done. https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [Clang] Fix a lambda pattern comparison mismatch after ecc7e6ce4 (#133863) (PR #134194)
erichkeane wrote: > @erichkeane What do you think about merging this PR to the release branch? I'm on the fence. It IS a regression that we should fix, and the new version is significantly better of an implementation than we already have, but the risk of further regression is non-zero (and on the higher end of my comfort zone). I'm definitely leaning towards "we should do this", but I'd love if we could let it bake in 'main' for more time before doing so (though it IS early in the next dot release, right? So we should have time to revert before/if the next release happens). https://github.com/llvm/llvm-project/pull/134194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [LoongArch][MC] Add relocation support for fld fst [x]vld [x]vst (PR #133836)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/133836 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)
@@ -0,0 +1,115 @@ +#include + +#define XXH_INLINE_ALL +#define XXH_NO_STDLIB +#define XXH_memcpy __builtin_memcpy +#define XXH_memset __builtin_memset +#define XXH_memcmp __builtin_memcmp +#include "../xxhash.h" + +// EmuPAC implements runtime emulation of PAC instructions. If the current +// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the +// emulation, which is effectively an implementation of PAC with an IMPDEF +// hashing scheme based on XXH128. +// +// The purpose of the emulation is to allow programs to be built to be portable +// to machines without PAC support, with some performance loss and increased +// probability of false positives (due to not being able to portably determine +// the VA size), while being functionally almost equivalent to running on a +// machine with PAC support. One example of a use case is if PAC is used in +// production as a security mitigation, but the testing environment is +// heterogeneous (i.e. some machines lack PAC support). In this case we would +// like the testing machines to be able to detect issues resulting +// from the use of PAC instructions that would affect production by running +// tests. This can be achieved by building test binaries with EmuPAC and +// production binaries with real PAC. +// +// The emulation assumes that the VA size is at most 48 bits. The architecture +// as of ARMv8.2, which was the last architecture version in which PAC was not +// mandatory, permitted VA size up to 52 bits via ARMv8.2-LVA, but we are +// unaware of an ARMv8.2 CPU that implemented ARMv8.2-LVA. + +const uint64_t kMaxVASize = 48; +const uint64_t kPACMask = ((1ULL << 55) - 1) & ~((1ULL << kMaxVASize) - 1); +const uint64_t kTTBR1Mask = 1ULL << 55; + +// Determine whether PAC is supported without accessing memory. This utilizes +// the XPACLRI instruction which will copy bit 55 of x30 into at least bit 54 if +// PAC is supported and acts as a NOP if PAC is not supported. +static _Bool pac_supported() { + register uintptr_t x30 __asm__("x30") = 1ULL << 55; + __asm__ __volatile__("xpaclri" : "+r"(x30)); + return x30 & (1ULL << 54); +} + +// This asm snippet is used to force the creation of a frame record when +// calling the EmuPAC functions. This is important because the EmuPAC functions +// may crash if an auth failure is detected and may be unwound past using a +// frame pointer based unwinder. +#ifdef __GCC_HAVE_DWARF2_CFI_ASM +#define frame_pointer_wrap(sym) \ + "stp x29, x30, [sp, #-16]!\n" \ + ".cfi_def_cfa_offset 16\n" \ + "mov x29, sp\n" \ + ".cfi_def_cfa w29, 16\n" \ + ".cfi_offset w30, -8\n" \ + ".cfi_offset w29, -16\n" \ + "bl " #sym "\n" \ + ".cfi_def_cfa wsp, 16\n" \ + "ldp x29, x30, [sp], #16\n" \ + ".cfi_def_cfa_offset 0\n" \ + ".cfi_restore w30\n" \ + ".cfi_restore w29\n" \ + "ret" +#else +#define frame_pointer_wrap(sym) \ + "stp x29, x30, [sp, #-16]!\n" \ + "mov x29, sp\n" \ + "bl " #sym "\n" \ + "ldp x29, x30, [sp], #16\n" \ + "ret" +#endif + +uint64_t __emupac_pacda_impl(uint64_t ptr, uint64_t disc) { + if (pac_supported()) { +__asm__ __volatile__(".arch_extension pauth\npacda %0, %1" + : "+r"(ptr) + : "r"(disc)); +return ptr; + } + if (ptr & kTTBR1Mask) { +if ((ptr & kPACMask) != kPACMask) { + return ptr | kPACMask; +} + } else { +if (ptr & kPACMask) { + return ptr & ~kPACMask; +} + } + uint64_t hash = XXH3_64bits_withSeed(&ptr, 8, disc); + return (ptr & ~kPACMask) | (hash & kPACMask); +} + +__attribute__((naked)) uint64_t __emupac_pacda(uint64_t ptr, uint64_t disc) { + __asm__(frame_pointer_wrap(__emupac_pacda_impl)); +} + +uint64_t __emupac_autda_impl(uint64_t ptr, uint64_t disc) { + if (pac_supported()) { pcc wrote: The problem with using ifuncs here is that this function will itself be called from ifunc resolvers (see [this](https://discourse.llvm.org/t/rfc-structure-protection-a-family-of-uaf-mitigation-techniques/8#:~:text=support%20for%20using-,Emulated%20PAC,-to%20relocate%20ptrauth)), so that could lead to order of initialization issues. Since performance is not that important for this function's use case I didn't try to avoid checking every time or experiment with other calling conventions. Also, if compiler-rt needs to be buildable with non-Clang compilers, I'm not sure that we can use the other calling conventions anyway. https://github.com/llvm/llvm-project/pull/133530 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Fix losing fast math flags in operands-to-args (PR #133421)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/133421 >From 1ef46ecbad539c23e95a02dd5f0a8f9f28b5865d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Mar 2025 18:00:05 +0700 Subject: [PATCH] llvm-reduce: Fix losing fast math flags in operands-to-args --- .../operands-to-args-preserve-fmf.ll | 20 +++ .../deltas/ReduceOperandsToArgs.cpp | 4 2 files changed, 24 insertions(+) create mode 100644 llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll diff --git a/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll b/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll new file mode 100644 index 0..b4b19ca28dbb5 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll @@ -0,0 +1,20 @@ +; RUN: llvm-reduce %s -o %t --abort-on-invalid-reduction --delta-passes=operands-to-args --test FileCheck --test-arg %s --test-arg --check-prefix=INTERESTING --test-arg --input-file +; RUN: FileCheck %s --input-file %t --check-prefix=REDUCED + +; INTERESTING-LABEL: define float @callee( +; INTERESTING: fadd float +define float @callee(float %a) { + %x = fadd float %a, 1.0 + ret float %x +} + +; INTERESTING-LABEL: define float @caller( +; INTERESTING: load float + +; REDUCED-LABEL: define float @caller(ptr %ptr, float %val, float %callee.ret1) { +; REDUCED: %callee.ret12 = call nnan nsz float @callee(float %val, float 0.00e+00) +define float @caller(ptr %ptr) { + %val = load float, ptr %ptr + %callee.ret = call nnan nsz float @callee(float %val) + ret float %callee.ret +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp index 037ff15fae0f6..e7ad52eb65a5d 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -107,6 +108,9 @@ static void replaceFunctionCalls(Function *OldF, Function *NewF) { NewCI->setCallingConv(NewF->getCallingConv()); NewCI->setAttributes(CI->getAttributes()); +if (auto *FPOp = dyn_cast(NewCI)) + NewCI->setFastMathFlags(CI->getFastMathFlags()); + // Do the replacement for this use. if (!CI->use_empty()) CI->replaceAllUsesWith(NewCI); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)
kasuga-fj wrote: Depends on #133664 (Sorry for inconvenience, I tried using [Graphite](https://app.graphite.dev/), but it didn't work fine due to my network problem). https://github.com/llvm/llvm-project/pull/133665 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Fix introducing unreachable code in simplify conditionals (PR #133842)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/133842 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef VFs, // even in the scalar case. RegUsage[ClassID] += 1; } else { +// The output from scaled phis and scaled reductions actually have +// fewer lanes than the VF. +auto VF = VFs[J]; +if (auto *ReductionR = dyn_cast(R)) SamTebbs33 wrote: Yeah that's a nice idea. We could add a `VPScaledRecipe` class. I agree with doing it afterwards. https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
@@ -5026,10 +5026,24 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef VFs, // even in the scalar case. RegUsage[ClassID] += 1; } else { +// The output from scaled phis and scaled reductions actually have +// fewer lanes than the VF. +auto VF = VFs[J]; +if (auto *ReductionR = dyn_cast(R)) + VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor()); +else if (auto *PartialReductionR = + dyn_cast(R)) + VF = VF.divideCoefficientBy(PartialReductionR->getVFScaleFactor()); +LLVM_DEBUG(if (VF != VFs[J]) { SamTebbs33 wrote: Done. https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] eec5039 - Revert "[Metadata] Preserve MD_prof when merging instructions when one is mis…"
Author: Snehasish Kumar Date: 2025-04-02T22:08:33-07:00 New Revision: eec5039bb4f01dd3067fce475ff8bf65297ed4ba URL: https://github.com/llvm/llvm-project/commit/eec5039bb4f01dd3067fce475ff8bf65297ed4ba DIFF: https://github.com/llvm/llvm-project/commit/eec5039bb4f01dd3067fce475ff8bf65297ed4ba.diff LOG: Revert "[Metadata] Preserve MD_prof when merging instructions when one is mis…" This reverts commit c18994c7cdf68dfbb35c998909aa837169bb0c25. Added: Modified: llvm/lib/Transforms/Utils/Local.cpp Removed: llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index c136825d47b9c..edec0e7a94422 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3355,10 +3355,9 @@ static void combineMetadata(Instruction *K, const Instruction *J, case LLVMContext::MD_invariant_group: // Preserve !invariant.group in K. break; - // Keep empty cases for prof, mmra, memprof, and callsite to prevent them - // from being removed as unknown metadata. The actual merging is handled + // Keep empty cases for mmra, memprof, and callsite to prevent them from + // being removed as unknown metadata. The actual merging is handled // separately below. - case LLVMContext::MD_prof: case LLVMContext::MD_mmra: case LLVMContext::MD_memprof: case LLVMContext::MD_callsite: @@ -3387,6 +3386,10 @@ static void combineMetadata(Instruction *K, const Instruction *J, if (!AAOnly) K->setMetadata(Kind, JMD); break; + case LLVMContext::MD_prof: +if (!AAOnly && DoesKMove) + K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J)); +break; case LLVMContext::MD_noalias_addrspace: if (DoesKMove) K->setMetadata(Kind, @@ -3433,16 +3436,6 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(LLVMContext::MD_callsite, MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite)); } - - // Merge prof metadata. - // Handle separately to support cases where only one instruction has the - // metadata. - auto *JProf = J->getMetadata(LLVMContext::MD_prof); - auto *KProf = K->getMetadata(LLVMContext::MD_prof); - if (!AAOnly && (JProf || KProf)) { -K->setMetadata(LLVMContext::MD_prof, - MDNode::getMergedProfMetadata(KProf, JProf, K, J)); - } } void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, diff --git a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll deleted file mode 100644 index d6058134f5285..0 --- a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll +++ /dev/null @@ -1,62 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 -; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=HOIST - -; Test case based on C++ code with manualy annotated !prof metadata. -; This is to test that when calls to 'func1' from 'if.then' block -; and 'if.else' block are hoisted, the branch_weights are merged and -; attached to merged call rather than dropped. -; -; int func1(int a, int b) ; -; int func2(int a, int b) ; - -; int func(int a, int b, bool c) { -;int sum= 0; -;if(c) { -;sum += func1(a, b); -;} else { -;sum += func1(a, b); -;sum -= func2(a, b); -;} -;return sum; -; } -define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) { -; HOIST-LABEL: define i32 @_Z4funciib -; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) { -; HOIST-NEXT: entry: -; HOIST-NEXT:[[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 [[B]]), !prof [[PROF0:![0-9]+]] -; HOIST-NEXT:br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]] -; HOIST: if.else: -; HOIST-NEXT:[[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 [[B]]) -; HOIST-NEXT:[[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]] -; HOIST-NEXT:br label [[IF_END]] -; HOIST: if.end: -; HOIST-NEXT:[[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], [[ENTRY:%.*]] ] -; HOIST-NEXT:ret i32 [[SUM_0]] -; -entry: - br i1 %c, label %if.then, label %if.else - -if.then: ; preds = %entry - %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b) - br label %if.end - -if.else: ; preds = %entry - %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0 - %call3 = ta
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Fix deployment targets that were incorrectly bumped (#134278) (PR #134435)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/134435 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 63bf5d5 - Revert "[clang] add support for -Wpadded on Windows (#130182)"
Author: Alex Bradbury Date: 2025-04-03T13:12:30+01:00 New Revision: 63bf5d531c46cbc9c1571debf285fc51463f007d URL: https://github.com/llvm/llvm-project/commit/63bf5d531c46cbc9c1571debf285fc51463f007d DIFF: https://github.com/llvm/llvm-project/commit/63bf5d531c46cbc9c1571debf285fc51463f007d.diff LOG: Revert "[clang] add support for -Wpadded on Windows (#130182)" This reverts commit 76fa9530c9ac7f81a49b840556f51f4838efbfe1. Added: Modified: clang/docs/ReleaseNotes.rst clang/lib/AST/RecordLayoutBuilder.cpp Removed: clang/test/SemaCXX/windows-Wpadded-bitfield.cpp clang/test/SemaCXX/windows-Wpadded.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 47f9c3caa0e47..fdf9a246d6373 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -190,8 +190,6 @@ Modified Compiler Flags - The compiler flag `-fbracket-depth` default value is increased from 256 to 2048. (#GH94728) -- `-Wpadded` option implemented for the `x86_64-windows-msvc` target. Fixes #61702 - Removed Compiler Flags - diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 41e7198cb7581..3e756ab9b9bfe 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -2274,9 +2274,9 @@ static unsigned getPaddingDiagFromTagKind(TagTypeKind Tag) { } } -static void CheckFieldPadding(const ASTContext &Context, bool IsUnion, - uint64_t Offset, uint64_t UnpaddedOffset, - const FieldDecl *D) { +void ItaniumRecordLayoutBuilder::CheckFieldPadding( +uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset, +unsigned UnpackedAlign, bool isPacked, const FieldDecl *D) { // We let objc ivars without warning, objc interfaces generally are not used // for padding tricks. if (isa(D)) @@ -2300,8 +2300,7 @@ static void CheckFieldPadding(const ASTContext &Context, bool IsUnion, if (D->getIdentifier()) { auto Diagnostic = D->isBitField() ? diag::warn_padded_struct_bitfield : diag::warn_padded_struct_field; - Context.getDiagnostics().Report(D->getLocation(), - Diagnostic) + Diag(D->getLocation(), Diagnostic) << getPaddingDiagFromTagKind(D->getParent()->getTagKind()) << Context.getTypeDeclType(D->getParent()) << PadSize << (InBits ? 1 : 0) // (byte|bit) @@ -2309,22 +2308,15 @@ static void CheckFieldPadding(const ASTContext &Context, bool IsUnion, } else { auto Diagnostic = D->isBitField() ? diag::warn_padded_struct_anon_bitfield : diag::warn_padded_struct_anon_field; - Context.getDiagnostics().Report(D->getLocation(), - Diagnostic) + Diag(D->getLocation(), Diagnostic) << getPaddingDiagFromTagKind(D->getParent()->getTagKind()) << Context.getTypeDeclType(D->getParent()) << PadSize << (InBits ? 1 : 0); // (byte|bit) } - } -} - -void ItaniumRecordLayoutBuilder::CheckFieldPadding( -uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset, -unsigned UnpackedAlign, bool isPacked, const FieldDecl *D) { - ::CheckFieldPadding(Context, IsUnion, Offset, UnpaddedOffset, D); - if (isPacked && Offset != UnpackedOffset) { -HasPackedField = true; - } + } + if (isPacked && Offset != UnpackedOffset) { + HasPackedField = true; + } } static const CXXMethodDecl *computeKeyFunction(ASTContext &Context, @@ -2650,6 +2642,8 @@ struct MicrosoftRecordLayoutBuilder { /// virtual base classes and their offsets in the record. ASTRecordLayout::VBaseOffsetsMapTy VBases; /// The number of remaining bits in our last bitfield allocation. + /// This value isn't meaningful unless LastFieldIsNonZeroWidthBitfield is + /// true. unsigned RemainingBitsInField; bool IsUnion : 1; /// True if the last field laid out was a bitfield and was not 0 @@ -3010,15 +3004,6 @@ void MicrosoftRecordLayoutBuilder::layoutField(const FieldDecl *FD) { } else { FieldOffset = Size.alignTo(Info.Alignment); } - - uint64_t UnpaddedFielddOffsetInBits = - Context.toBits(DataSize) - RemainingBitsInField; - - ::CheckFieldPadding(Context, IsUnion, Context.toBits(FieldOffset), - UnpaddedFielddOffsetInBits, FD); - - RemainingBitsInField = 0; - placeFieldAtOffset(FieldOffset); if (!IsOverlappingEmptyField) @@ -3064,14 +3049,10 @@ void MicrosoftRecordLayoutBuilder::layoutBitField(const FieldDecl *FD) { } else { // Allocate a new block of memory and place the bitfield in it. CharUnits FieldOffset = Size.alignTo(Info.Alignment); -uint64_t UnpaddedFieldOffsetInBits = -Context.toBits(DataSize) - Remain
[llvm-branch-commits] [flang] [flang][OpenMP] Extend `do concurrent` mapping to multi-range loops (PR #127634)
https://github.com/skatrak approved this pull request. Thank you Kareem, LGTM! https://github.com/llvm/llvm-project/pull/127634 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Fix losing call metadata in operands-to-args (PR #133422)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/133422 >From 2bd8ae1562c25fbd2273d19db59e320292595786 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Mar 2025 18:01:39 +0700 Subject: [PATCH] llvm-reduce: Fix using call metadata in operands-to-args --- .../tools/llvm-reduce/operands-to-args-preserve-fmf.ll | 7 +-- llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll b/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll index b4b19ca28dbb5..fc31a08353b8f 100644 --- a/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll +++ b/llvm/test/tools/llvm-reduce/operands-to-args-preserve-fmf.ll @@ -12,9 +12,12 @@ define float @callee(float %a) { ; INTERESTING: load float ; REDUCED-LABEL: define float @caller(ptr %ptr, float %val, float %callee.ret1) { -; REDUCED: %callee.ret12 = call nnan nsz float @callee(float %val, float 0.00e+00) +; REDUCED: %callee.ret12 = call nnan nsz float @callee(float %val, float 0.00e+00), !fpmath !0 define float @caller(ptr %ptr) { %val = load float, ptr %ptr - %callee.ret = call nnan nsz float @callee(float %val) + %callee.ret = call nnan nsz float @callee(float %val), !fpmath !0 ret float %callee.ret } + +; REDUCED: !0 = !{float 2.00e+00} +!0 = !{float 2.0} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp index e7ad52eb65a5d..33f6463be6581 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp @@ -111,6 +111,8 @@ static void replaceFunctionCalls(Function *OldF, Function *NewF) { if (auto *FPOp = dyn_cast(NewCI)) NewCI->setFastMathFlags(CI->getFastMathFlags()); +NewCI->copyMetadata(*CI); + // Do the replacement for this use. if (!CI->use_empty()) CI->replaceAllUsesWith(NewCI); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits