[llvm-branch-commits] [llvm] release/20.x: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) (PR #126367)
https://github.com/jh7370 approved this pull request. LGTM. Regarding the release note, you'll need one on the release branch. IIUC, you won't then need one on `main`, because the release branch is still in RC mode, so not a final release yet. https://github.com/llvm/llvm-project/pull/126367 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] bc855e1 - Revert "[mlir] Python: Parse ModuleOp from file path (#125736)"
Author: Mehdi Amini Date: 2025-02-10T09:09:23+01:00 New Revision: bc855e1ae6be47285cbbfa78fdb202593eb513fc URL: https://github.com/llvm/llvm-project/commit/bc855e1ae6be47285cbbfa78fdb202593eb513fc DIFF: https://github.com/llvm/llvm-project/commit/bc855e1ae6be47285cbbfa78fdb202593eb513fc.diff LOG: Revert "[mlir] Python: Parse ModuleOp from file path (#125736)" This reverts commit 4e14b8afb44af58ab7073bb8c0b52875599b0ae1. Added: Modified: mlir/include/mlir-c/IR.h mlir/include/mlir/Bindings/Python/Nanobind.h mlir/lib/Bindings/Python/IRCore.cpp mlir/lib/CAPI/IR/IR.cpp mlir/python/mlir/_mlir_libs/_mlir/ir.pyi mlir/test/python/ir/module.py Removed: diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index 14ccae650606af8..7d2fd89e8560fc9 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -309,10 +309,6 @@ MLIR_CAPI_EXPORTED MlirModule mlirModuleCreateEmpty(MlirLocation location); MLIR_CAPI_EXPORTED MlirModule mlirModuleCreateParse(MlirContext context, MlirStringRef module); -/// Parses a module from file and transfers ownership to the caller. -MLIR_CAPI_EXPORTED MlirModule -mlirModuleCreateParseFromFile(MlirContext context, MlirStringRef fileName); - /// Gets the context that a module was created with. MLIR_CAPI_EXPORTED MlirContext mlirModuleGetContext(MlirModule module); diff --git a/mlir/include/mlir/Bindings/Python/Nanobind.h b/mlir/include/mlir/Bindings/Python/Nanobind.h index bc8bddf4caf7e77..ca942c83d3e2fad 100644 --- a/mlir/include/mlir/Bindings/Python/Nanobind.h +++ b/mlir/include/mlir/Bindings/Python/Nanobind.h @@ -23,7 +23,6 @@ #endif #include #include -#include #include #include #include diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index 2e4b6d1ce35c1b6..47a85c2a486fd46 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -6,7 +6,6 @@ // //===--===// -#include #include #include @@ -300,7 +299,7 @@ struct PyAttrBuilderMap { return *builder; } static void dunderSetItemNamed(const std::string &attributeKind, - nb::callable func, bool replace) { +nb::callable func, bool replace) { PyGlobals::get().registerAttributeBuilder(attributeKind, std::move(func), replace); } @@ -3050,19 +3049,6 @@ void mlir::python::populateIRCore(nb::module_ &m) { }, nb::arg("asm"), nb::arg("context").none() = nb::none(), kModuleParseDocstring) - .def_static( - "parse", - [](const std::filesystem::path &path, - DefaultingPyMlirContext context) { -PyMlirContext::ErrorCapture errors(context->getRef()); -MlirModule module = mlirModuleCreateParseFromFile( -context->get(), toMlirStringRef(path.string())); -if (mlirModuleIsNull(module)) - throw MLIRError("Unable to parse module assembly", errors.take()); -return PyModule::forModule(module).releaseObject(); - }, - nb::arg("asm"), nb::arg("context").none() = nb::none(), - kModuleParseDocstring) .def_static( "create", [](DefaultingPyLocation loc) { diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp index 999e8cbda1295a1..f27af0ca9a2c78b 100644 --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -22,7 +22,6 @@ #include "mlir/IR/Location.h" #include "mlir/IR/Operation.h" #include "mlir/IR/OperationSupport.h" -#include "mlir/IR/OwningOpRef.h" #include "mlir/IR/Types.h" #include "mlir/IR/Value.h" #include "mlir/IR/Verifier.h" @@ -329,15 +328,6 @@ MlirModule mlirModuleCreateParse(MlirContext context, MlirStringRef module) { return MlirModule{owning.release().getOperation()}; } -MlirModule mlirModuleCreateParseFromFile(MlirContext context, - MlirStringRef fileName) { - OwningOpRef owning = - parseSourceFile(unwrap(fileName), unwrap(context)); - if (!owning) -return MlirModule{nullptr}; - return MlirModule{owning.release().getOperation()}; -} - MlirContext mlirModuleGetContext(MlirModule module) { return wrap(unwrap(module).getContext()); } diff --git a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi index 096b87b36244368..fb7efb8cd28a5eb 100644 --- a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi +++ b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi @@ -46,7 +46,6 @@ import abc import collections from collections.abc import Callable, Sequence import io -from pathlib import Path from typing import Any, ClassVar, TypeVar, overload __all__ = [ @@ -
[llvm-branch-commits] [llvm] release/20.x: Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." (#124962) (PR #126487)
https://github.com/lukel97 approved this pull request. Thanks for fixing the cherry-pick. Re: #124499, I couldn't think of a simple fix we could apply on top of e3fbf19eb4428cac03c0e7301512f11f8947d743 for the 20.x release branch. I think it's best if we cherry-pick the revert so that performance isn't impacted on 20.x, and just continue to fix the cost model stuff in-tree for 21.x. https://github.com/llvm/llvm-project/pull/126487 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [ARM] Empty structs are 1-byte for C++ ABI (#124762) (PR #125194)
https://github.com/ostannard updated https://github.com/llvm/llvm-project/pull/125194 >From f486aca315b8af06e050fab2f7d4f31675607b07 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 31 Jan 2025 09:03:01 + Subject: [PATCH 1/2] [ARM] Empty structs are 1-byte for C++ ABI (#124762) For C++ (but not C), empty structs should be passed to functions as if they are a 1 byte object with 1 byte alignment. This is defined in Arm's CPPABI32: https://github.com/ARM-software/abi-aa/blob/main/cppabi32/cppabi32.rst For the purposes of parameter passing in AAPCS32, a parameter whose type is an empty class shall be treated as if its type were an aggregate with a single member of type unsigned byte. The AArch64 equivalent of this has an exception for structs containing an array of size zero, I've kept that logic for ARM. I've not found a reason for this exception, but I've checked that GCC does have the same behaviour for ARM as it does for AArch64. The AArch64 version has an Apple ABI with different rules, which ignores empty structs in both C and C++. This is documented at https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms. The ARM equivalent of that appears to be AAPCS16_VFP, used for WatchOS, but I can't find any documentation for that ABI, so I'm not sure what rules it should follow. For now I've left it following the AArch64 Apple rules. --- clang/include/clang/Basic/LangOptions.h | 2 + clang/lib/CodeGen/Targets/ARM.cpp | 45 +++- clang/test/CodeGen/arm-empty-args.cpp | 131 3 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/arm-empty-args.cpp diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 114a5d34a008bd7..16c35bcf49339c6 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -246,6 +246,8 @@ class LangOptionsBase { /// construction vtable because it hasn't added 'type' as a substitution. /// - Skip mangling enclosing class templates of member-like friend /// function templates. +/// - Ignore empty struct arguments in C++ mode for ARM, instead of +/// passing them as if they had a size of 1 byte. Ver19, /// Conform to the underlying platform's C and C++ ABIs as closely diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index 2d858fa2f3c3a35..47e31ceeaf29431 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -71,6 +71,7 @@ class ARMABIInfo : public ABIInfo { unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; + bool shouldIgnoreEmptyArg(QualType Ty) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; bool containsAnyFP16Vectors(QualType Ty) const; @@ -328,6 +329,31 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align); } +bool ARMABIInfo::shouldIgnoreEmptyArg(QualType Ty) const { + uint64_t Size = getContext().getTypeSize(Ty); + assert((isEmptyRecord(getContext(), Ty, true) || Size == 0) && + "Arg is not empty"); + + // Empty records are ignored in C mode, and in C++ on WatchOS. + if (!getContext().getLangOpts().CPlusPlus || + getABIKind() == ARMABIKind::AAPCS16_VFP) +return true; + + // In C++ mode, arguments which have sizeof() == 0 are ignored. This is not a + // situation which is defined by any C++ standard or ABI, but this matches + // GCC's de facto ABI. + if (Size == 0) +return true; + + // Clang 19.0 and earlier always ignored empty struct arguments in C++ mode. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver19) +return true; + + // Otherwise, they are passed as if they have a size of 1 byte. + return false; +} + ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, unsigned functionCallConv) const { // 6.1.2.1 The following argument types are VFP CPRCs: @@ -366,9 +392,15 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } - // Ignore empty records. - if (isEmptyRecord(getContext(), Ty, true)) -return ABIArgInfo::getIgnore(); + // Empty records are either ignored completely or passed as if they were a + // 1-byte object, depending on the ABI and language standard. + if (isEmptyRecord(getContext(), Ty, true) || + getContext().getTypeSize(Ty) == 0) { +if (shouldIgnoreEmptyArg(Ty)) + return ABIArgInfo::getIgnore(); +else + return ABIArgInfo::getDirect(llvm::Type::getInt8T
[llvm-branch-commits] [llvm] release/20.x: Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." (#124962) (PR #126487)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126487 Backport 5921295dcaa1ad514d79e0ee824b9df1c077a2d0 Requested by: @RKSimon >From c00fb5578488f0d482a781e7f05740c2d84d6b50 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 29 Jan 2025 22:17:53 + Subject: [PATCH] Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." (#124962) Reverts llvm/llvm-project#124129 as its currently causing a regression at #124499 - avoids the regression until a proper fix can be added to getSpillCost (cherry picked from commit 5921295dcaa1ad514d79e0ee824b9df1c077a2d0) --- .../Transforms/Vectorize/SLPVectorizer.cpp| 14 +++-- .../SLPVectorizer/AArch64/loadorder.ll| 33 +- .../SLPVectorizer/AArch64/reduce-fadd.ll | 28 - .../SLPVectorizer/AMDGPU/min_max.ll | 8 +-- .../SLPVectorizer/RISCV/complex-loads.ll | 62 --- 5 files changed, 73 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2532edc5d86990e..19963e780ebd350 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -12258,12 +12258,18 @@ InstructionCost BoUpSLP::getSpillCost() const { if (auto *II = dyn_cast(I)) { if (II->isAssumeLikeIntrinsic()) return true; - IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II); + FastMathFlags FMF; + SmallVector Tys; + for (auto &ArgOp : II->args()) +Tys.push_back(ArgOp->getType()); + if (auto *FPMO = dyn_cast(II)) +FMF = FPMO->getFastMathFlags(); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys, + FMF); InstructionCost IntrCost = TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput); - InstructionCost CallCost = - TTI->getCallInstrCost(nullptr, II->getType(), ICA.getArgTypes(), -TTI::TCK_RecipThroughput); + InstructionCost CallCost = TTI->getCallInstrCost( + nullptr, II->getType(), Tys, TTI::TCK_RecipThroughput); if (IntrCost < CallCost) return true; } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5ad676537f9c457..9ce79e5ea356b98 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -684,27 +684,27 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT:[[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM5]] ; CHECK-NEXT:[[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1 ; CHECK-NEXT:[[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64 -; CHECK-NEXT:[[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]] -; CHECK-NEXT:[[ADD14:%.*]] = add nsw i32 [[MUL]], 2 +; CHECK-NEXT:[[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]] +; CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT:[[ADD14:%.*]] = or disjoint i32 [[MUL]], 1 ; CHECK-NEXT:[[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64 ; CHECK-NEXT:[[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM15]] -; CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4 ; CHECK-NEXT:[[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3 ; CHECK-NEXT:[[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64 ; CHECK-NEXT:[[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]] ; CHECK-NEXT:[[TMP2:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4 ; CHECK-NEXT:[[ADD26:%.*]] = add nsw i32 [[MUL21]], 1 ; CHECK-NEXT:[[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64 -; CHECK-NEXT:[[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]] +; CHECK-NEXT:[[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]] ; CHECK-NEXT:[[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i8, ptr [[Y:%.*]], i64 8 ; CHECK-NEXT:[[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4 ; CHECK-NEXT:[[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]] -; CHECK-NEXT:[[ARRAYIDX49:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]] -; CHECK-NEXT:[[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]] +; CHECK-NEXT:[[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]] ; CHECK-NEXT:[[TMP4:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT:[[ARRAYIDX52:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]] ; CHECK-NEXT:[[ARRAYIDX60:%.*]] = getelementptr inb
[llvm-branch-commits] [llvm] release/20.x: Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." (#124962) (PR #126487)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126487 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." (#124962) (PR #126487)
llvmbot wrote: @preames What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126487 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: Revert "[SLP] getSpillCost - fully populate IntrinsicCostAttributes to improve cost analysis." (#124962) (PR #126487)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport 5921295dcaa1ad514d79e0ee824b9df1c077a2d0 Requested by: @RKSimon --- Patch is 20.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126487.diff 5 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+10-4) - (modified) llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll (+18-15) - (modified) llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll (+14-14) - (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll (+4-4) - (modified) llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll (+27-35) ``diff diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2532edc5d86990e..19963e780ebd350 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -12258,12 +12258,18 @@ InstructionCost BoUpSLP::getSpillCost() const { if (auto *II = dyn_cast(I)) { if (II->isAssumeLikeIntrinsic()) return true; - IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II); + FastMathFlags FMF; + SmallVector Tys; + for (auto &ArgOp : II->args()) +Tys.push_back(ArgOp->getType()); + if (auto *FPMO = dyn_cast(II)) +FMF = FPMO->getFastMathFlags(); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys, + FMF); InstructionCost IntrCost = TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput); - InstructionCost CallCost = - TTI->getCallInstrCost(nullptr, II->getType(), ICA.getArgTypes(), -TTI::TCK_RecipThroughput); + InstructionCost CallCost = TTI->getCallInstrCost( + nullptr, II->getType(), Tys, TTI::TCK_RecipThroughput); if (IntrCost < CallCost) return true; } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5ad676537f9c457..9ce79e5ea356b98 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -684,27 +684,27 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT:[[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM5]] ; CHECK-NEXT:[[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1 ; CHECK-NEXT:[[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64 -; CHECK-NEXT:[[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]] -; CHECK-NEXT:[[ADD14:%.*]] = add nsw i32 [[MUL]], 2 +; CHECK-NEXT:[[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]] +; CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT:[[ADD14:%.*]] = or disjoint i32 [[MUL]], 1 ; CHECK-NEXT:[[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64 ; CHECK-NEXT:[[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM15]] -; CHECK-NEXT:[[TMP1:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4 ; CHECK-NEXT:[[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3 ; CHECK-NEXT:[[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64 ; CHECK-NEXT:[[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]] ; CHECK-NEXT:[[TMP2:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4 ; CHECK-NEXT:[[ADD26:%.*]] = add nsw i32 [[MUL21]], 1 ; CHECK-NEXT:[[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64 -; CHECK-NEXT:[[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]] +; CHECK-NEXT:[[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]] ; CHECK-NEXT:[[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i8, ptr [[Y:%.*]], i64 8 ; CHECK-NEXT:[[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4 ; CHECK-NEXT:[[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]] -; CHECK-NEXT:[[ARRAYIDX49:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]] -; CHECK-NEXT:[[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]] +; CHECK-NEXT:[[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]] ; CHECK-NEXT:[[TMP4:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT:[[ARRAYIDX52:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]] ; CHECK-NEXT:[[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]] ; CHECK-NEXT:[[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4 -; CHECK-NEXT:[[ARRAYIDX65:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]] +; CHECK-NEXT:[[ARRAYIDX64:%.*]] = getelementpt
[llvm-branch-commits] [lld] release/20.x: [LLD][ELF][AArch64] Discard .ARM.attributes sections (#125838) (PR #126065)
sivan-shani wrote: > @zmodem (or anyone else). If you would like to add a note about this fix in > the release notes (completely optional). Please reply to this comment with a > one or two sentence description of the fix. When you are done, please add the > release:note label to this PR. Prevents lld from accumulating AArch64 build attributes sections into object files. https://github.com/llvm/llvm-project/pull/126065 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [DSE] Don't use initializes on byval argument (#126259) (PR #126493)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport 2d31a12dbe2339d20844ede70cbb54dbaf4ceea9 Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/126493.diff 3 Files Affected: - (modified) llvm/docs/LangRef.rst (+4) - (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+3-1) - (modified) llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll (+14) ``diff diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index d004ced9dff1468..e002195cb7ed588 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1725,6 +1725,10 @@ Currently, only the following parameter attributes are defined: and negative values are allowed in case the argument points partway into an allocation. An empty list is not allowed. +On a ``byval`` argument, ``initializes`` refers to the given parts of the +callee copy being overwritten. A ``byval`` callee can never initialize the +original caller memory passed to the ``byval`` argument. + ``dead_on_unwind`` At a high level, this attribute indicates that the pointer argument is dead if the call unwinds, in the sense that the caller will not depend on the diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 13f3de07c3c44d0..0fdc3354753b183 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -2281,7 +2281,9 @@ DSEState::getInitializesArgMemLoc(const Instruction *I) { for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) { ConstantRangeList Inits; Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes); -if (InitializesAttr.isValid()) +// initializes on byval arguments refers to the callee copy, not the +// original memory the caller passed in. +if (InitializesAttr.isValid() && !CB->isByValArgument(Idx)) Inits = InitializesAttr.getValueAsConstantRangeList(); Value *CurArg = CB->getArgOperand(Idx); diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll index e590c5bf4004afd..5f8ab56c22754d4 100644 --- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll +++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll @@ -338,3 +338,17 @@ define i16 @global_var_alias() { ret i16 %l } +declare void @byval_fn(ptr byval(i32) initializes((0, 4)) %am) + +define void @test_byval() { +; CHECK-LABEL: @test_byval( +; CHECK-NEXT:[[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT:store i32 0, ptr [[A]], align 4 +; CHECK-NEXT:call void @byval_fn(ptr [[A]]) +; CHECK-NEXT:ret void +; + %a = alloca i32 + store i32 0, ptr %a + call void @byval_fn(ptr %a) + ret void +} `` https://github.com/llvm/llvm-project/pull/126493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstSimplify] Add additional checks when substituting pointers (#125385) (PR #125398)
https://github.com/nikic approved this pull request. https://github.com/llvm/llvm-project/pull/125398 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) (PR #126367)
AmrDeveloper wrote: > LGTM. > > Regarding the release note, you'll need one on the release branch. IIUC, you > won't then need one on `main`, because the release branch is still in RC > mode, so not a final release yet. Not sure if I can push release note on this PR only, but I will create a follow-up PR to add release note on release branch @jh7370 https://github.com/llvm/llvm-project/pull/126367 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Fix bit width handling in computeKnownBits() for GEPs (#125532) (PR #126496)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126496 Backport 3dc1ef1650c8389a6f195a474781cf2281208bed 3bd11b502c1846afa5e1257c94b7a70566e34686 Requested by: @nikic >From 9f6a0ff7b5241eb86993b1505de2776fcfebfee5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Feb 2025 17:37:07 +0100 Subject: [PATCH 1/2] [ValueTracking] Add additional tests for computeKnownBits on GEPs (NFC) These demonstrate miscompiles in the existing code. (cherry picked from commit 3dc1ef1650c8389a6f195a474781cf2281208bed) --- llvm/unittests/Analysis/ValueTrackingTest.cpp | 35 +++ 1 file changed, 35 insertions(+) diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index ee44aac45594d1b..39865fa195cf757 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2679,6 +2679,41 @@ TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAbsoluteSymbol) { EXPECT_EQ(0u, Known_0_256_Align8.countMinTrailingOnes()); } +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPExtendBeforeMul) { + // FIXME: The index should be extended before multiplying with the scale. + parseAssembly(R"( +target datalayout = "p:16:16:16" + +define void @test(i16 %arg) { + %and = and i16 %arg, u0x8000 + %base = inttoptr i16 %and to ptr + %A = getelementptr i32, ptr %base, i8 80 + ret void +} +)"); + KnownBits Known = computeKnownBits(A, M->getDataLayout()); + EXPECT_EQ(~64 & 0x7fff, Known.Zero); + EXPECT_EQ(64, Known.One); +} + +TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPOnlyIndexBits) { + // FIXME: GEP should only affect the index width. + parseAssembly(R"( +target datalayout = "p:16:16:16:8" + +define void @test(i16 %arg) { + %and = and i16 %arg, u0x8000 + %or = or i16 %and, u0x00ff + %base = inttoptr i16 %or to ptr + %A = getelementptr i8, ptr %base, i8 1 + ret void +} +)"); + KnownBits Known = computeKnownBits(A, M->getDataLayout()); + EXPECT_EQ(0x7eff, Known.Zero); + EXPECT_EQ(0x100, Known.One); +} + TEST_F(ValueTrackingTest, HaveNoCommonBitsSet) { { // Check for an inverted mask: (X & ~M) op (Y & M). >From d8ea75d51aec5c7232db9613aea8454ac2f824ec Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Feb 2025 14:29:58 +0100 Subject: [PATCH 2/2] [ValueTracking] Fix bit width handling in computeKnownBits() for GEPs (#125532) For GEPs, we have three bit widths involved: The pointer bit width, the index bit width, and the bit width of the GEP operands. The correct behavior here is: * We need to sextOrTrunc the GEP operand to the index width *before* multiplying by the scale. * If the index width and pointer width differ, GEP only ever modifies the low bits. Adds should not overflow into the high bits. I'm testing this via unit tests because it's a bit tricky to test in IR with InstCombine canonicalization getting in the way. (cherry picked from commit 3bd11b502c1846afa5e1257c94b7a70566e34686) --- llvm/lib/Analysis/ValueTracking.cpp | 66 ++- llvm/unittests/Analysis/ValueTrackingTest.cpp | 12 ++-- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b63a0a07f7de292..8a674914641a85c 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1445,7 +1445,22 @@ static void computeKnownBitsFromOperator(const Operator *I, computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); // Accumulate the constant indices in a separate variable // to minimize the number of calls to computeForAddSub. -APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); +unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(I->getType()); +APInt AccConstIndices(IndexWidth, 0); + +auto AddIndexToKnown = [&](KnownBits IndexBits) { + if (IndexWidth == BitWidth) { +// Note that inbounds does *not* guarantee nsw for the addition, as only +// the offset is signed, while the base address is unsigned. +Known = KnownBits::add(Known, IndexBits); + } else { +// If the index width is smaller than the pointer width, only add the +// value to the low bits. +assert(IndexWidth < BitWidth && + "Index width can't be larger than pointer width"); +Known.insertBits(KnownBits::add(Known.trunc(IndexWidth), IndexBits), 0); + } +}; gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { @@ -1483,43 +1498,34 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } - unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); - KnownBits IndexBits(IndexBitWidth); - computeKnownBits(Index, IndexBits, Depth + 1, Q); - TypeSize IndexTypeSize = GTI.getSe
[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Fix bit width handling in computeKnownBits() for GEPs (#125532) (PR #126496)
llvmbot wrote: @dtcxzyw What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126496 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Fix bit width handling in computeKnownBits() for GEPs (#125532) (PR #126496)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126496 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Fix bit width handling in computeKnownBits() for GEPs (#125532) (PR #126496)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: None (llvmbot) Changes Backport 3dc1ef1650c8389a6f195a474781cf2281208bed 3bd11b502c1846afa5e1257c94b7a70566e34686 Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/126496.diff 2 Files Affected: - (modified) llvm/lib/Analysis/ValueTracking.cpp (+36-30) - (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+35) ``diff diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b63a0a07f7de292..8a674914641a85c 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1445,7 +1445,22 @@ static void computeKnownBitsFromOperator(const Operator *I, computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); // Accumulate the constant indices in a separate variable // to minimize the number of calls to computeForAddSub. -APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); +unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(I->getType()); +APInt AccConstIndices(IndexWidth, 0); + +auto AddIndexToKnown = [&](KnownBits IndexBits) { + if (IndexWidth == BitWidth) { +// Note that inbounds does *not* guarantee nsw for the addition, as only +// the offset is signed, while the base address is unsigned. +Known = KnownBits::add(Known, IndexBits); + } else { +// If the index width is smaller than the pointer width, only add the +// value to the low bits. +assert(IndexWidth < BitWidth && + "Index width can't be larger than pointer width"); +Known.insertBits(KnownBits::add(Known.trunc(IndexWidth), IndexBits), 0); + } +}; gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { @@ -1483,43 +1498,34 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } - unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); - KnownBits IndexBits(IndexBitWidth); - computeKnownBits(Index, IndexBits, Depth + 1, Q); - TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL); - uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); - KnownBits ScalingFactor(IndexBitWidth); + TypeSize Stride = GTI.getSequentialElementStride(Q.DL); + uint64_t StrideInBytes = Stride.getKnownMinValue(); + if (!Stride.isScalable()) { +// Fast path for constant offset. +if (auto *CI = dyn_cast(Index)) { + AccConstIndices += + CI->getValue().sextOrTrunc(IndexWidth) * StrideInBytes; + continue; +} + } + + KnownBits IndexBits = + computeKnownBits(Index, Depth + 1, Q).sextOrTrunc(IndexWidth); + KnownBits ScalingFactor(IndexWidth); // Multiply by current sizeof type. // &A[i] == A + i * sizeof(*A[i]). - if (IndexTypeSize.isScalable()) { + if (Stride.isScalable()) { // For scalable types the only thing we know about sizeof is // that this is a multiple of the minimum size. -ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes)); - } else if (IndexBits.isConstant()) { -APInt IndexConst = IndexBits.getConstant(); -APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); -IndexConst *= ScalingFactor; -AccConstIndices += IndexConst.sextOrTrunc(BitWidth); -continue; +ScalingFactor.Zero.setLowBits(llvm::countr_zero(StrideInBytes)); } else { ScalingFactor = -KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); +KnownBits::makeConstant(APInt(IndexWidth, StrideInBytes)); } - IndexBits = KnownBits::mul(IndexBits, ScalingFactor); - - // If the offsets have a different width from the pointer, according - // to the language reference we need to sign-extend or truncate them - // to the width of the pointer. - IndexBits = IndexBits.sextOrTrunc(BitWidth); - - // Note that inbounds does *not* guarantee nsw for the addition, as only - // the offset is signed, while the base address is unsigned. - Known = KnownBits::add(Known, IndexBits); -} -if (!Known.isUnknown() && !AccConstIndices.isZero()) { - KnownBits Index = KnownBits::makeConstant(AccConstIndices); - Known = KnownBits::add(Known, Index); + AddIndexToKnown(KnownBits::mul(IndexBits, ScalingFactor)); } +if (!Known.isUnknown() && !AccConstIndices.isZero()) + AddIndexToKnown(KnownBits::makeConstant(AccConstIndices)); break; } case Instruction::PHI: { diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index ee44aac45594d1b..50e5e0e6b2ff5b9 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2679,6 +2679,41 @@ TEST_F(ComputeKno
[llvm-branch-commits] [llvm] release/20.x: [DSE] Don't use initializes on byval argument (#126259) (PR #126493)
llvmbot wrote: @dtcxzyw What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [DSE] Don't use initializes on byval argument (#126259) (PR #126493)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126493 Backport 2d31a12dbe2339d20844ede70cbb54dbaf4ceea9 Requested by: @nikic >From 50830ff14060ff6ff8a3c6062aff2a5b835c4d33 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 10 Feb 2025 10:34:03 +0100 Subject: [PATCH] [DSE] Don't use initializes on byval argument (#126259) There are two ways we can fix this problem, depending on how the semantics of byval and initializes should interact: * Don't infer initializes on byval arguments. initializes on byval refers to the original caller memory (or having both attributes is made a verifier error). * Infer initializes on byval, but don't use it in DSE. initializes on byval refers to the callee copy. This matches the semantics of readonly on byval. This is slightly more powerful, for example, we could do a backend optimization where byval + initializes will allocate the full size of byval on the stack but not copy over the parts covered by initializes. I went with the second variant here, skipping byval + initializes in DSE (FunctionAttrs already doesn't propagate initializes past byval). I'm open to going in the other direction though. Fixes https://github.com/llvm/llvm-project/issues/126181. (cherry picked from commit 2d31a12dbe2339d20844ede70cbb54dbaf4ceea9) --- llvm/docs/LangRef.rst | 4 .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 4 +++- .../DeadStoreElimination/inter-procedural.ll | 14 ++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index d004ced9dff1468..e002195cb7ed588 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1725,6 +1725,10 @@ Currently, only the following parameter attributes are defined: and negative values are allowed in case the argument points partway into an allocation. An empty list is not allowed. +On a ``byval`` argument, ``initializes`` refers to the given parts of the +callee copy being overwritten. A ``byval`` callee can never initialize the +original caller memory passed to the ``byval`` argument. + ``dead_on_unwind`` At a high level, this attribute indicates that the pointer argument is dead if the call unwinds, in the sense that the caller will not depend on the diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 13f3de07c3c44d0..0fdc3354753b183 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -2281,7 +2281,9 @@ DSEState::getInitializesArgMemLoc(const Instruction *I) { for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) { ConstantRangeList Inits; Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes); -if (InitializesAttr.isValid()) +// initializes on byval arguments refers to the callee copy, not the +// original memory the caller passed in. +if (InitializesAttr.isValid() && !CB->isByValArgument(Idx)) Inits = InitializesAttr.getValueAsConstantRangeList(); Value *CurArg = CB->getArgOperand(Idx); diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll index e590c5bf4004afd..5f8ab56c22754d4 100644 --- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll +++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll @@ -338,3 +338,17 @@ define i16 @global_var_alias() { ret i16 %l } +declare void @byval_fn(ptr byval(i32) initializes((0, 4)) %am) + +define void @test_byval() { +; CHECK-LABEL: @test_byval( +; CHECK-NEXT:[[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT:store i32 0, ptr [[A]], align 4 +; CHECK-NEXT:call void @byval_fn(ptr [[A]]) +; CHECK-NEXT:ret void +; + %a = alloca i32 + store i32 0, ptr %a + call void @byval_fn(ptr %a) + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [DSE] Don't use initializes on byval argument (#126259) (PR #126493)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [ScalarEvolution] Handle addrec incoming value in isImpliedViaMerge() (#126236) (PR #126492)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126492 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [ScalarEvolution] Handle addrec incoming value in isImpliedViaMerge() (#126236) (PR #126492)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126492 Backport ae08969a2068dd327fbf4d0f606550574fbb9e45 7aed53eb1982113e825534f0f66d0a0e46e7a5ed Requested by: @nikic >From 4b9b2830f11c3d3e397d150164b1525bdefa75ae Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 7 Feb 2025 12:41:06 +0100 Subject: [PATCH 1/2] [IndVars] Add test for #126012 (NFC) (cherry picked from commit ae08969a2068dd327fbf4d0f606550574fbb9e45) --- .../Transforms/IndVarSimplify/pr126012.ll | 49 +++ 1 file changed, 49 insertions(+) create mode 100644 llvm/test/Transforms/IndVarSimplify/pr126012.ll diff --git a/llvm/test/Transforms/IndVarSimplify/pr126012.ll b/llvm/test/Transforms/IndVarSimplify/pr126012.ll new file mode 100644 index 000..725ea89b8e65189 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pr126012.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=indvars < %s | FileCheck %s + +; FIXME: This is a miscompile. +define i32 @test() { +; CHECK-LABEL: define i32 @test() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT:br label %[[FOR_PREHEADER:.*]] +; CHECK: [[FOR_PREHEADER]]: +; CHECK-NEXT:[[INDVAR1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PHI:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT:[[INDVAR3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT:[[COND1:%.*]] = icmp eq i32 [[INDVAR3]], 0 +; CHECK-NEXT:br i1 [[COND1]], label %[[FOR_INC]], label %[[FOR_END:.*]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT:[[EXT:%.*]] = zext i1 true to i32 +; CHECK-NEXT:br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT:[[PHI]] = phi i32 [ [[EXT]], %[[FOR_END]] ], [ 0, %[[FOR_PREHEADER]] ] +; CHECK-NEXT:[[INC]] = add nuw nsw i32 [[INDVAR3]], 1 +; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i32 [[INDVAR3]], 2 +; CHECK-NEXT:br i1 [[EXITCOND]], label %[[FOR_EXIT:.*]], label %[[FOR_PREHEADER]] +; CHECK: [[FOR_EXIT]]: +; CHECK-NEXT:[[INDVAR1_LCSSA:%.*]] = phi i32 [ [[INDVAR1]], %[[FOR_INC]] ] +; CHECK-NEXT:ret i32 [[INDVAR1_LCSSA]] +; +entry: + br label %for.preheader + +for.preheader: + %indvar1 = phi i32 [ 0, %entry ], [ %phi, %for.inc ] + %indvar2 = phi i32 [ 1, %entry ], [ %indvar3, %for.inc ] + %indvar3 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cond1 = icmp eq i32 %indvar3, 0 + br i1 %cond1, label %for.inc, label %for.end + +for.end: + %cmp = icmp sgt i32 %indvar2, 0 + %ext = zext i1 %cmp to i32 + br label %for.inc + +for.inc: + %phi = phi i32 [ %ext, %for.end ], [ 0, %for.preheader ] + %inc = add i32 %indvar3, 1 + %exitcond = icmp eq i32 %indvar3, 2 + br i1 %exitcond, label %for.exit, label %for.preheader + +for.exit: + ret i32 %indvar1 +} >From 5d5f8d3b9273236ce73bf547f7f80d41be93fe62 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 10 Feb 2025 10:07:21 +0100 Subject: [PATCH 2/2] [ScalarEvolution] Handle addrec incoming value in isImpliedViaMerge() (#126236) The code already guards against values coming from a previous iteration using properlyDominates(). However, addrecs are considered to properly dominate the loop they are defined in. Handle this special case separately, by checking for expressions that have computable loop evolution (this should cover cases like a zext of an addrec as well). I considered changing the definition of properlyDominates() instead, but decided against it. The current definition is useful in other context, e.g. when deciding whether an expression is safe to expand in a given block. Fixes https://github.com/llvm/llvm-project/issues/126012. (cherry picked from commit 7aed53eb1982113e825534f0f66d0a0e46e7a5ed) --- llvm/lib/Analysis/ScalarEvolution.cpp | 6 ++ llvm/test/Transforms/IndVarSimplify/pr126012.ll | 10 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 2ce40877b523e10..c71202c8dd58e4d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12402,6 +12402,12 @@ bool ScalarEvolution::isImpliedViaMerge(CmpPredicate Pred, const SCEV *LHS, // iteration of a loop. if (!properlyDominates(L, LBB)) return false; + // Addrecs are considered to properly dominate their loop, so are missed + // by the previous check. Discard any values that have computable + // evolution in this loop. + if (auto *Loop = LI.getLoopFor(LBB)) +if (hasComputableLoopEvolution(L, Loop)) + return false; if (!ProvedEasily(L, RHS)) return false; } diff --git a/llvm/test/Transforms/IndVarSimplify/pr126012.ll b/llvm/test/Transforms/IndVarSimplify/pr126012.ll index 725ea89b8e65189..5189fe020dd3bfd 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr126012.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr126012.ll @
[llvm-branch-commits] [llvm] release/20.x: [ScalarEvolution] Handle addrec incoming value in isImpliedViaMerge() (#126236) (PR #126492)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: None (llvmbot) Changes Backport ae08969a2068dd327fbf4d0f606550574fbb9e45 7aed53eb1982113e825534f0f66d0a0e46e7a5ed Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/126492.diff 2 Files Affected: - (modified) llvm/lib/Analysis/ScalarEvolution.cpp (+6) - (added) llvm/test/Transforms/IndVarSimplify/pr126012.ll (+53) ``diff diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 2ce40877b523e10..c71202c8dd58e4d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12402,6 +12402,12 @@ bool ScalarEvolution::isImpliedViaMerge(CmpPredicate Pred, const SCEV *LHS, // iteration of a loop. if (!properlyDominates(L, LBB)) return false; + // Addrecs are considered to properly dominate their loop, so are missed + // by the previous check. Discard any values that have computable + // evolution in this loop. + if (auto *Loop = LI.getLoopFor(LBB)) +if (hasComputableLoopEvolution(L, Loop)) + return false; if (!ProvedEasily(L, RHS)) return false; } diff --git a/llvm/test/Transforms/IndVarSimplify/pr126012.ll b/llvm/test/Transforms/IndVarSimplify/pr126012.ll new file mode 100644 index 000..5189fe020dd3bfd --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pr126012.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=indvars < %s | FileCheck %s + +; Do not infer that %cmp is true. The %indvar3 input of %indvar2 comes from +; a previous iteration, so we should not compare it to a value from the current +; iteration. +define i32 @test() { +; CHECK-LABEL: define i32 @test() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT:br label %[[FOR_PREHEADER:.*]] +; CHECK: [[FOR_PREHEADER]]: +; CHECK-NEXT:[[INDVAR1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PHI:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT:[[INDVAR2:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[INDVAR3:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT:[[INDVAR3]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT:[[COND1:%.*]] = icmp eq i32 [[INDVAR3]], 0 +; CHECK-NEXT:br i1 [[COND1]], label %[[FOR_INC]], label %[[FOR_END:.*]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i32 [[INDVAR2]], 0 +; CHECK-NEXT:[[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT:br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT:[[PHI]] = phi i32 [ [[EXT]], %[[FOR_END]] ], [ 0, %[[FOR_PREHEADER]] ] +; CHECK-NEXT:[[INC]] = add nuw nsw i32 [[INDVAR3]], 1 +; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i32 [[INDVAR3]], 2 +; CHECK-NEXT:br i1 [[EXITCOND]], label %[[FOR_EXIT:.*]], label %[[FOR_PREHEADER]] +; CHECK: [[FOR_EXIT]]: +; CHECK-NEXT:[[INDVAR1_LCSSA:%.*]] = phi i32 [ [[INDVAR1]], %[[FOR_INC]] ] +; CHECK-NEXT:ret i32 [[INDVAR1_LCSSA]] +; +entry: + br label %for.preheader + +for.preheader: + %indvar1 = phi i32 [ 0, %entry ], [ %phi, %for.inc ] + %indvar2 = phi i32 [ 1, %entry ], [ %indvar3, %for.inc ] + %indvar3 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cond1 = icmp eq i32 %indvar3, 0 + br i1 %cond1, label %for.inc, label %for.end + +for.end: + %cmp = icmp sgt i32 %indvar2, 0 + %ext = zext i1 %cmp to i32 + br label %for.inc + +for.inc: + %phi = phi i32 [ %ext, %for.end ], [ 0, %for.preheader ] + %inc = add i32 %indvar3, 1 + %exitcond = icmp eq i32 %indvar3, 2 + br i1 %exitcond, label %for.exit, label %for.preheader + +for.exit: + ret i32 %indvar1 +} `` https://github.com/llvm/llvm-project/pull/126492 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [ScalarEvolution] Handle addrec incoming value in isImpliedViaMerge() (#126236) (PR #126492)
llvmbot wrote: @dtcxzyw What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126492 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] d7bbf29 - Revert "[LinkerWrapper] Clean up options after proper forwarding (#126297)"
Author: Jan Patrick Lehr Date: 2025-02-10T10:48:00+01:00 New Revision: d7bbf2979f8f1f982094b52a4b1cea6147050aab URL: https://github.com/llvm/llvm-project/commit/d7bbf2979f8f1f982094b52a4b1cea6147050aab DIFF: https://github.com/llvm/llvm-project/commit/d7bbf2979f8f1f982094b52a4b1cea6147050aab.diff LOG: Revert "[LinkerWrapper] Clean up options after proper forwarding (#126297)" This reverts commit addbb4448487717283d334e48c63868d6f8553be. Added: Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/linker-wrapper.c clang/test/Driver/openmp-offload.c clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 82f4cabd620d77f..ea376ac00d9108b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9252,14 +9252,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, for (StringRef Arg : LinkerArgs) CmdArgs.push_back(Args.MakeArgString( "--device-linker=" + TC->getTripleString() + "=" + Arg)); - - // Forward the LTO mode relying on the Driver's parsing. - if (C.getDriver().getOffloadLTOMode() == LTOK_Full) -CmdArgs.push_back(Args.MakeArgString( -"--device-compiler=" + TC->getTripleString() + "=-flto=full")); - else if (C.getDriver().getOffloadLTOMode() == LTOK_Thin) -CmdArgs.push_back(Args.MakeArgString( -"--device-compiler=" + TC->getTripleString() + "=-flto=thin")); } } @@ -9267,9 +9259,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString("--host-triple=" + getToolChain().getTripleString())); if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("--wrapper-verbose"); - if (Arg *A = Args.getLastArg(options::OPT_cuda_path_EQ)) -CmdArgs.push_back( -Args.MakeArgString(Twine("--cuda-path=") + A->getValue())); // Construct the link job so we can wrap around it. Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index e7b7af7bdfbf32d..f416ee5f4463bcc 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -21,16 +21,16 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK -// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o +// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-compiler=-g \ +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG -// NVPTX-LINK-DEBUG: clang{{.*}} --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}-g +// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -39,16 +39,16 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK -// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-compiler=--save-temps \ +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --link
[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix op mask detection in performZExtDeinterleaveShuffleCombine (#126054) (PR #126263)
https://github.com/SamTebbs33 approved this pull request. It makes sense to merge this as it fixes a micompilation. https://github.com/llvm/llvm-project/pull/126263 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix op mask detection in performZExtDeinterleaveShuffleCombine (#126054) (PR #126263)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/126263 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) (PR #126518)
https://github.com/zmodem approved this pull request. > @zmodem What do you think about merging this PR to the release branch? I think that sounds like a good idea :-) https://github.com/llvm/llvm-project/pull/126518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
Meinersbur wrote: > * build `../llvm` and install into `$PREFIX` > * build `../runtimes` (using > `-DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind"`) and install into > `$PREFIX` > * build `../clang` (while reusing the bits from llvm/libcxx in `$PREFIX`) and > install into `$PREFIX` > * build `../compiler-rt` (while reusing the bits from clang in `$PREFIX`) and > install into `$PREFIX` > * build `../mlir` (while reusing the bits from llvm in `$PREFIX`) and install > into `$PREFIX` > * build `../flang` (while reusing all the other bits in `$PREFIX`) and > install into `$PREFIX` > * build `../flang-rt` Flang-RT also needs to be installed into $PREFIX, and Flang invoked from `${PREFIX}/bin/flang`. Flang is not able to guess flang-rt's build. > If you want I can work out a full script that does this, but this is why we > might arrive in a different place, conceptually speaking. Yes, that would be helpful. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Replace SELRMux with COPY in case of identical operands. (#125108) (PR #125236)
https://github.com/uweigand approved this pull request. Sorry for the delay, I've been travelling. This looks good to backport to me. Thanks! https://github.com/llvm/llvm-project/pull/125236 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #126503)
https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/126503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #126503)
https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/126503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Make benchmarks dry-run by default on the release branch (PR #126441)
https://github.com/rorth milestoned https://github.com/llvm/llvm-project/pull/126441 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [MLIR][OpenMP] Add Lowering support for OpenMP Declare Mapper directive (PR #117046)
@@ -2612,7 +2612,52 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) { - TODO(converter.getCurrentLocation(), "OpenMPDeclareMapperConstruct"); + mlir::Location loc = converter.genLocation(declareMapperConstruct.source); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + lower::StatementContext stmtCtx; + const auto &spec = + std::get(declareMapperConstruct.t); + const auto &mapperName{std::get>(spec.t)}; + const auto &varType{std::get(spec.t)}; + const auto &varName{std::get(spec.t)}; + assert(varType.declTypeSpec->category() == + semantics::DeclTypeSpec::Category::TypeDerived && + "Expected derived type"); + + std::string mapperNameStr; + if (mapperName.has_value()) { +mapperNameStr = mapperName->ToString(); +mapperNameStr = +converter.mangleName(mapperNameStr, mapperName->symbol->owner()); + } else { +mapperNameStr = +varType.declTypeSpec->derivedTypeSpec().name().ToString() + ".default"; +mapperNameStr = converter.mangleName( +mapperNameStr, *varType.declTypeSpec->derivedTypeSpec().GetScope()); + } + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); TIFitis wrote: `offload/test/offloading/fortran/target-custom-mapper.f90` uses a mapper within a mapper. Let me know if that's the scenario you were referring to here. https://github.com/llvm/llvm-project/pull/117046 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Handle f(no-)strict-overflow, f(no-)wrapv, f(no-)wrapv-pointer like gcc (#126524) (PR #126535)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126535 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Handle f(no-)strict-overflow, f(no-)wrapv, f(no-)wrapv-pointer like gcc (#126524) (PR #126535)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126535 Backport 783275eb7b3ecde63bdb6ac1316c090bfc568bdd Requested by: @nico >From b849d1483fbc499ccffad768e10b375e0a49c5e1 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 10 Feb 2025 10:57:22 -0500 Subject: [PATCH] [clang] Handle f(no-)strict-overflow, f(no-)wrapv, f(no-)wrapv-pointer like gcc (#126524) We now process all 6 options left-to-right and pick whatever is active at the end. Fixes #124868. (cherry picked from commit 783275eb7b3ecde63bdb6ac1316c090bfc568bdd) --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 48 +++--- clang/test/Driver/clang_wrapv_opts.c | 24 +-- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 4ed4dbc1a8d1bc9..ae635fb6a18079e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3098,21 +3098,39 @@ bool tools::shouldRecordCommandLine(const ToolChain &TC, void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, ArgStringList &CmdArgs) { - // -fno-strict-overflow implies -fwrapv if it isn't disabled, but - // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. - bool StrictOverflow = Args.hasFlag(options::OPT_fstrict_overflow, - options::OPT_fno_strict_overflow, true); - if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { -if (A->getOption().matches(options::OPT_fwrapv)) - CmdArgs.push_back("-fwrapv"); - } else if (!StrictOverflow) { -CmdArgs.push_back("-fwrapv"); + bool use_fwrapv = false; + bool use_fwrapv_pointer = false; + for (const Arg *A : Args.filtered( + options::OPT_fstrict_overflow, options::OPT_fno_strict_overflow, + options::OPT_fwrapv, options::OPT_fno_wrapv, + options::OPT_fwrapv_pointer, options::OPT_fno_wrapv_pointer)) { +A->claim(); +switch (A->getOption().getID()) { +case options::OPT_fstrict_overflow: + use_fwrapv = false; + use_fwrapv_pointer = false; + break; +case options::OPT_fno_strict_overflow: + use_fwrapv = true; + use_fwrapv_pointer = true; + break; +case options::OPT_fwrapv: + use_fwrapv = true; + break; +case options::OPT_fno_wrapv: + use_fwrapv = false; + break; +case options::OPT_fwrapv_pointer: + use_fwrapv_pointer = true; + break; +case options::OPT_fno_wrapv_pointer: + use_fwrapv_pointer = false; + break; +} } - if (Arg *A = Args.getLastArg(options::OPT_fwrapv_pointer, - options::OPT_fno_wrapv_pointer)) { -if (A->getOption().matches(options::OPT_fwrapv_pointer)) - CmdArgs.push_back("-fwrapv-pointer"); - } else if (!StrictOverflow) { + + if (use_fwrapv) +CmdArgs.push_back("-fwrapv"); + if (use_fwrapv_pointer) CmdArgs.push_back("-fwrapv-pointer"); - } } diff --git a/clang/test/Driver/clang_wrapv_opts.c b/clang/test/Driver/clang_wrapv_opts.c index 9f3a884324dcddd..295d8deb0d99d46 100644 --- a/clang/test/Driver/clang_wrapv_opts.c +++ b/clang/test/Driver/clang_wrapv_opts.c @@ -1,20 +1,20 @@ // RUN: %clang -### -S -fwrapv -fno-wrapv -fwrapv -Werror %s 2>&1 | FileCheck -check-prefix=CHECK1 %s // CHECK1: "-fwrapv" -// + // RUN: %clang -### -S -fwrapv-pointer -fno-wrapv-pointer -fwrapv-pointer -Werror %s 2>&1 | FileCheck -check-prefix=CHECK1-POINTER %s // CHECK1-POINTER: "-fwrapv-pointer" -// + // RUN: %clang -### -S -fstrict-overflow -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK2 %s // CHECK2: "-fwrapv"{{.*}}"-fwrapv-pointer" -// + // RUN: %clang -### -S -fwrapv -fstrict-overflow -Werror -Werror %s 2>&1 | FileCheck -check-prefix=CHECK3 %s --implicit-check-not="-fwrapv-pointer" -// CHECK3: "-fwrapv" -// +// CHECK3-NOT: "-fwrapv" + // RUN: %clang -### -S -fwrapv-pointer -fstrict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK3-POINTER %s --implicit-check-not="-fwrapv" -// CHECK3-POINTER: "-fwrapv-pointer" -// -// RUN: %clang -### -S -fno-wrapv -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4 %s --implicit-check-not="-fwrapv" -// CHECK4: "-fwrapv-pointer" -// -// RUN: %clang -### -S -fno-wrapv-pointer -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4-POINTER %s --implicit-check-not="-fwrapv-pointer" -// CHECK4-POINTER: "-fwrapv" +// CHECK3-POINTER-NOT: "-fwrapv-pointer" + +// RUN: %clang -### -S -fno-wrapv -fno-strict-overflow -fno-wrapv-pointer -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4 %s --implicit-check-not="-fwrapv-pointer" +// CHECK4: "-fwrapv" + +// RUN: %clang -### -S -fno-wrapv-pointer -fno-strict-overflow -fno-wrapv -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4-POINTER %s --implicit-check-not="-fwrapv" +// CHECK4-POINTE
[llvm-branch-commits] [clang] release/20.x: [clang] Handle f(no-)strict-overflow, f(no-)wrapv, f(no-)wrapv-pointer like gcc (#126524) (PR #126535)
llvmbot wrote: @llvm/pr-subscribers-clang-driver Author: None (llvmbot) Changes Backport 783275eb7b3ecde63bdb6ac1316c090bfc568bdd Requested by: @nico --- Full diff: https://github.com/llvm/llvm-project/pull/126535.diff 2 Files Affected: - (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+33-15) - (modified) clang/test/Driver/clang_wrapv_opts.c (+12-12) ``diff diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 4ed4dbc1a8d1bc9..ae635fb6a18079e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3098,21 +3098,39 @@ bool tools::shouldRecordCommandLine(const ToolChain &TC, void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, ArgStringList &CmdArgs) { - // -fno-strict-overflow implies -fwrapv if it isn't disabled, but - // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. - bool StrictOverflow = Args.hasFlag(options::OPT_fstrict_overflow, - options::OPT_fno_strict_overflow, true); - if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { -if (A->getOption().matches(options::OPT_fwrapv)) - CmdArgs.push_back("-fwrapv"); - } else if (!StrictOverflow) { -CmdArgs.push_back("-fwrapv"); + bool use_fwrapv = false; + bool use_fwrapv_pointer = false; + for (const Arg *A : Args.filtered( + options::OPT_fstrict_overflow, options::OPT_fno_strict_overflow, + options::OPT_fwrapv, options::OPT_fno_wrapv, + options::OPT_fwrapv_pointer, options::OPT_fno_wrapv_pointer)) { +A->claim(); +switch (A->getOption().getID()) { +case options::OPT_fstrict_overflow: + use_fwrapv = false; + use_fwrapv_pointer = false; + break; +case options::OPT_fno_strict_overflow: + use_fwrapv = true; + use_fwrapv_pointer = true; + break; +case options::OPT_fwrapv: + use_fwrapv = true; + break; +case options::OPT_fno_wrapv: + use_fwrapv = false; + break; +case options::OPT_fwrapv_pointer: + use_fwrapv_pointer = true; + break; +case options::OPT_fno_wrapv_pointer: + use_fwrapv_pointer = false; + break; +} } - if (Arg *A = Args.getLastArg(options::OPT_fwrapv_pointer, - options::OPT_fno_wrapv_pointer)) { -if (A->getOption().matches(options::OPT_fwrapv_pointer)) - CmdArgs.push_back("-fwrapv-pointer"); - } else if (!StrictOverflow) { + + if (use_fwrapv) +CmdArgs.push_back("-fwrapv"); + if (use_fwrapv_pointer) CmdArgs.push_back("-fwrapv-pointer"); - } } diff --git a/clang/test/Driver/clang_wrapv_opts.c b/clang/test/Driver/clang_wrapv_opts.c index 9f3a884324dcddd..295d8deb0d99d46 100644 --- a/clang/test/Driver/clang_wrapv_opts.c +++ b/clang/test/Driver/clang_wrapv_opts.c @@ -1,20 +1,20 @@ // RUN: %clang -### -S -fwrapv -fno-wrapv -fwrapv -Werror %s 2>&1 | FileCheck -check-prefix=CHECK1 %s // CHECK1: "-fwrapv" -// + // RUN: %clang -### -S -fwrapv-pointer -fno-wrapv-pointer -fwrapv-pointer -Werror %s 2>&1 | FileCheck -check-prefix=CHECK1-POINTER %s // CHECK1-POINTER: "-fwrapv-pointer" -// + // RUN: %clang -### -S -fstrict-overflow -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK2 %s // CHECK2: "-fwrapv"{{.*}}"-fwrapv-pointer" -// + // RUN: %clang -### -S -fwrapv -fstrict-overflow -Werror -Werror %s 2>&1 | FileCheck -check-prefix=CHECK3 %s --implicit-check-not="-fwrapv-pointer" -// CHECK3: "-fwrapv" -// +// CHECK3-NOT: "-fwrapv" + // RUN: %clang -### -S -fwrapv-pointer -fstrict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK3-POINTER %s --implicit-check-not="-fwrapv" -// CHECK3-POINTER: "-fwrapv-pointer" -// -// RUN: %clang -### -S -fno-wrapv -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4 %s --implicit-check-not="-fwrapv" -// CHECK4: "-fwrapv-pointer" -// -// RUN: %clang -### -S -fno-wrapv-pointer -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4-POINTER %s --implicit-check-not="-fwrapv-pointer" -// CHECK4-POINTER: "-fwrapv" +// CHECK3-POINTER-NOT: "-fwrapv-pointer" + +// RUN: %clang -### -S -fno-wrapv -fno-strict-overflow -fno-wrapv-pointer -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4 %s --implicit-check-not="-fwrapv-pointer" +// CHECK4: "-fwrapv" + +// RUN: %clang -### -S -fno-wrapv-pointer -fno-strict-overflow -fno-wrapv -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4-POINTER %s --implicit-check-not="-fwrapv" +// CHECK4-POINTER: "-fwrapv-pointer" `` https://github.com/llvm/llvm-project/pull/126535 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Handle f(no-)strict-overflow, f(no-)wrapv, f(no-)wrapv-pointer like gcc (#126524) (PR #126535)
llvmbot wrote: @nikic What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126535 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR (PR #125836)
https://github.com/krzysz00 approved this pull request. Per further discussion, given the broader context of these changes, this is fine assuming @arsenm's comments about the documentation are addressed. https://github.com/llvm/llvm-project/pull/125836 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
https://github.com/skatrak commented: Thank you Akash for this work. I have a couple of comments, but hopefully not too involved to address. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -7394,24 +7394,26 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( void OpenMPIRBuilder::emitOffloadingArraysAndArgs( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, -TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous, -bool ForEndCall, function_ref DeviceAddrCB, -function_ref CustomMapperCB) { - emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous, - DeviceAddrCB, CustomMapperCB); +TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, +function_ref CustomMapperCB, bool IsNonContiguous, +bool ForEndCall, function_ref DeviceAddrCB) { + emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, CustomMapperCB, + IsNonContiguous, DeviceAddrCB); emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); } static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, + OpenMPIRBuilder::TargetDataInfo &Info, skatrak wrote: This new argument doesn't seem to be used, unless I'm missing something. Was your plan to replace the local variable with the same name in the `EmitTargetCallThen` lambda, perhaps? https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3529,6 +3549,84 @@ static void genMapInfos(llvm::IRBuilderBase &builder, } } +static llvm::Expected +emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +static llvm::Expected +getOrCreateUserDefinedMapperFunc(Operation *declMapperOp, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + static llvm::DenseMap userDefMapperMap; skatrak wrote: I think we should avoid creating these kinds of global variables. Can't we use `moduleTranslation.lookupFunction()` + `moduleTranslation.mapFunction()` to handle this case? We'd just need to make sure we can easily obtain the function name from the operation, which I guess looking below it's `ompBuilder>createPlatformSpecificName({"omp_mapper", declMapperOp.getSymName()});`. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
https://github.com/skatrak edited https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3529,6 +3549,84 @@ static void genMapInfos(llvm::IRBuilderBase &builder, } } +static llvm::Expected +emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +static llvm::Expected +getOrCreateUserDefinedMapperFunc(Operation *declMapperOp, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + static llvm::DenseMap userDefMapperMap; + auto iter = userDefMapperMap.find(declMapperOp); + if (iter != userDefMapperMap.end()) +return iter->second; + llvm::Expected mapperFunc = + emitUserDefinedMapper(declMapperOp, builder, moduleTranslation); + if (!mapperFunc) +return mapperFunc.takeError(); + userDefMapperMap.try_emplace(declMapperOp, *mapperFunc); + return mapperFunc; +} + +static llvm::Expected +emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto declMapperOp = cast(op); + auto declMapperInfoOp = + *declMapperOp.getOps().begin(); + DataLayout dl = DataLayout(declMapperOp->getParentOfType()); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType()); + std::string mapperName = ompBuilder->createPlatformSpecificName( + {"omp_mapper", declMapperOp.getSymName()}); + SmallVector mapVars = declMapperInfoOp.getMapVars(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // Fill up the arrays with all the mapped variables. + MapInfosTy combinedInfo; + auto genMapInfoCB = + [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI, + llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy { +builder.restoreIP(codeGenIP); +moduleTranslation.mapValue(declMapperOp.getRegion().getArgument(0), ptrPHI); skatrak wrote: Nit: It would probably be good to add to the PR where you're introducing `DeclareMapperOp`, an `extraClassDeclaration` and simplify this like it's done for other similar operations: ```suggestion moduleTranslation.mapValue(declMapperOp.getMoldArg(), ptrPHI); ``` https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3529,6 +3549,84 @@ static void genMapInfos(llvm::IRBuilderBase &builder, } } +static llvm::Expected +emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +static llvm::Expected +getOrCreateUserDefinedMapperFunc(Operation *declMapperOp, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + static llvm::DenseMap userDefMapperMap; + auto iter = userDefMapperMap.find(declMapperOp); + if (iter != userDefMapperMap.end()) +return iter->second; + llvm::Expected mapperFunc = + emitUserDefinedMapper(declMapperOp, builder, moduleTranslation); + if (!mapperFunc) +return mapperFunc.takeError(); + userDefMapperMap.try_emplace(declMapperOp, *mapperFunc); + return mapperFunc; +} + +static llvm::Expected +emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto declMapperOp = cast(op); + auto declMapperInfoOp = + *declMapperOp.getOps().begin(); + DataLayout dl = DataLayout(declMapperOp->getParentOfType()); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType()); + std::string mapperName = ompBuilder->createPlatformSpecificName( + {"omp_mapper", declMapperOp.getSymName()}); + SmallVector mapVars = declMapperInfoOp.getMapVars(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // Fill up the arrays with all the mapped variables. + MapInfosTy combinedInfo; + auto genMapInfoCB = + [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI, + llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy { +builder.restoreIP(codeGenIP); +moduleTranslation.mapValue(declMapperOp.getRegion().getArgument(0), ptrPHI); +moduleTranslation.mapBlock(&declMapperOp.getRegion().front(), + builder.GetInsertBlock()); +if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(), + /*ignoreArguments=*/true, + builder))) + return llvm::make_error(); +MapInfoData mapData; +collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl, + builder); +genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData); + +// Drop the mapping that is no longer necessary so that the same region can +// be processed multiple times. +moduleTranslation.forgetMapping(declMapperOp.getRegion()); +return combinedInfo; + }; + + auto customMapperCB = [&](unsigned i, llvm::Function **mapperFunc) { +if (combinedInfo.Mappers[i]) { + // Call the corresponding mapper function. + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfo.Mappers[i], builder, moduleTranslation); + assert(newFn && "Expect a valid mapper function is available"); skatrak wrote: This callback can fail, so it must return an `llvm::Expected`. This requires updating the types expected by the OMPIRBuilder to `function_ref(unsigned int, Function **)>` to every place these types of callbacks are passed, so I'd suggest giving it a name in OMPIRBuilder.h to simplify things a bit (e.g. `using CustomMapperCallbackTy = function_ref(unsigned int, Function **)>`). That will cause you to also have to check the result and forward errors caused by these callbacks when called inside of the OMPIRBuilder. Currently, the `assert` here will be removed in some builds, triggering another assertion, regardless of whether there was an error, due to not having checked the error status before trying to get the result below. Instead, this should be done: ```c++ if (!newFn) return newFn.takeError(); ``` https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -4438,14 +4546,49 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::OpenMPIRBuilder::TargetDataInfo info( + /*RequiresDevicePointerInfo=*/false, + /*SeparateBeginEndCalls=*/true); + llvm::Value *ifCond = nullptr; + if (Value targetIfCond = targetOp.getIfExpr()) +ifCond = moduleTranslation.lookupValue(targetIfCond); + + auto customMapperCB = [&](unsigned int i) { +llvm::Value *mapperFunc = nullptr; +if (combinedInfos.Mappers[i]) { + info.HasMapper = true; + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfos.Mappers[i], builder, moduleTranslation); + assert(newFn && "Expect a valid mapper function is available"); + mapperFunc = *newFn; +} +return mapperFunc; + }; + + llvm::OpenMPIRBuilder::TargetDataInfo info( + /*RequiresDevicePointerInfo=*/false, + /*SeparateBeginEndCalls=*/true); + + auto customMapperCB = [&](unsigned int i) { +llvm::Value *mapperFunc = nullptr; +if (combinedInfos.Mappers[i]) { + info.HasMapper = true; + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfos.Mappers[i], builder, moduleTranslation); + assert(newFn && "Expect a valid mapper function is available"); + mapperFunc = *newFn; +} +return mapperFunc; + }; skatrak wrote: Looks like this got copy-pasted twice. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3745,17 +3840,30 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, return builder.saveIP(); }; + auto customMapperCB = [&](unsigned int i) { +llvm::Function *mapperFunc = nullptr; +if (combinedInfo.Mappers[i]) { + info.HasMapper = true; + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfo.Mappers[i], builder, moduleTranslation); + assert(newFn && "Expect a valid mapper function is available"); + mapperFunc = *newFn; +} +return mapperFunc; + }; + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() { if (isa(op)) return ompBuilder->createTargetData( ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), - ifCond, info, genMapInfoCB, nullptr, bodyGenCB); -return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(), -builder.getInt64(deviceID), ifCond, -info, genMapInfoCB, &RTLFn); + ifCond, info, genMapInfoCB, customMapperCB, nullptr, bodyGenCB, skatrak wrote: Nit: Document `nullptr` argument, like it's done for `DeviceAddrCB`. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Handle f(no-)strict-overflow, f(no-)wrapv, f(no-)wrapv-pointer like gcc (#126524) (PR #126535)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 783275eb7b3ecde63bdb6ac1316c090bfc568bdd Requested by: @nico --- Full diff: https://github.com/llvm/llvm-project/pull/126535.diff 2 Files Affected: - (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+33-15) - (modified) clang/test/Driver/clang_wrapv_opts.c (+12-12) ``diff diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 4ed4dbc1a8d1bc9..ae635fb6a18079e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3098,21 +3098,39 @@ bool tools::shouldRecordCommandLine(const ToolChain &TC, void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, ArgStringList &CmdArgs) { - // -fno-strict-overflow implies -fwrapv if it isn't disabled, but - // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. - bool StrictOverflow = Args.hasFlag(options::OPT_fstrict_overflow, - options::OPT_fno_strict_overflow, true); - if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { -if (A->getOption().matches(options::OPT_fwrapv)) - CmdArgs.push_back("-fwrapv"); - } else if (!StrictOverflow) { -CmdArgs.push_back("-fwrapv"); + bool use_fwrapv = false; + bool use_fwrapv_pointer = false; + for (const Arg *A : Args.filtered( + options::OPT_fstrict_overflow, options::OPT_fno_strict_overflow, + options::OPT_fwrapv, options::OPT_fno_wrapv, + options::OPT_fwrapv_pointer, options::OPT_fno_wrapv_pointer)) { +A->claim(); +switch (A->getOption().getID()) { +case options::OPT_fstrict_overflow: + use_fwrapv = false; + use_fwrapv_pointer = false; + break; +case options::OPT_fno_strict_overflow: + use_fwrapv = true; + use_fwrapv_pointer = true; + break; +case options::OPT_fwrapv: + use_fwrapv = true; + break; +case options::OPT_fno_wrapv: + use_fwrapv = false; + break; +case options::OPT_fwrapv_pointer: + use_fwrapv_pointer = true; + break; +case options::OPT_fno_wrapv_pointer: + use_fwrapv_pointer = false; + break; +} } - if (Arg *A = Args.getLastArg(options::OPT_fwrapv_pointer, - options::OPT_fno_wrapv_pointer)) { -if (A->getOption().matches(options::OPT_fwrapv_pointer)) - CmdArgs.push_back("-fwrapv-pointer"); - } else if (!StrictOverflow) { + + if (use_fwrapv) +CmdArgs.push_back("-fwrapv"); + if (use_fwrapv_pointer) CmdArgs.push_back("-fwrapv-pointer"); - } } diff --git a/clang/test/Driver/clang_wrapv_opts.c b/clang/test/Driver/clang_wrapv_opts.c index 9f3a884324dcddd..295d8deb0d99d46 100644 --- a/clang/test/Driver/clang_wrapv_opts.c +++ b/clang/test/Driver/clang_wrapv_opts.c @@ -1,20 +1,20 @@ // RUN: %clang -### -S -fwrapv -fno-wrapv -fwrapv -Werror %s 2>&1 | FileCheck -check-prefix=CHECK1 %s // CHECK1: "-fwrapv" -// + // RUN: %clang -### -S -fwrapv-pointer -fno-wrapv-pointer -fwrapv-pointer -Werror %s 2>&1 | FileCheck -check-prefix=CHECK1-POINTER %s // CHECK1-POINTER: "-fwrapv-pointer" -// + // RUN: %clang -### -S -fstrict-overflow -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK2 %s // CHECK2: "-fwrapv"{{.*}}"-fwrapv-pointer" -// + // RUN: %clang -### -S -fwrapv -fstrict-overflow -Werror -Werror %s 2>&1 | FileCheck -check-prefix=CHECK3 %s --implicit-check-not="-fwrapv-pointer" -// CHECK3: "-fwrapv" -// +// CHECK3-NOT: "-fwrapv" + // RUN: %clang -### -S -fwrapv-pointer -fstrict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK3-POINTER %s --implicit-check-not="-fwrapv" -// CHECK3-POINTER: "-fwrapv-pointer" -// -// RUN: %clang -### -S -fno-wrapv -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4 %s --implicit-check-not="-fwrapv" -// CHECK4: "-fwrapv-pointer" -// -// RUN: %clang -### -S -fno-wrapv-pointer -fno-strict-overflow -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4-POINTER %s --implicit-check-not="-fwrapv-pointer" -// CHECK4-POINTER: "-fwrapv" +// CHECK3-POINTER-NOT: "-fwrapv-pointer" + +// RUN: %clang -### -S -fno-wrapv -fno-strict-overflow -fno-wrapv-pointer -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4 %s --implicit-check-not="-fwrapv-pointer" +// CHECK4: "-fwrapv" + +// RUN: %clang -### -S -fno-wrapv-pointer -fno-strict-overflow -fno-wrapv -Werror %s 2>&1 | FileCheck -check-prefix=CHECK4-POINTER %s --implicit-check-not="-fwrapv" +// CHECK4-POINTER: "-fwrapv-pointer" `` https://github.com/llvm/llvm-project/pull/126535 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [LV] Forget LCSSA phi with new pred before other SCEV invalidation. (#119897) (PR #126542)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport 3706dfef660097f24fb5efbac0d7f14b424492ed Requested by: @fhahn --- Full diff: https://github.com/llvm/llvm-project/pull/126542.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+4-4) - (added) llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll (+118) ``diff diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 06c2a91f89b1c5b..0ceeec48487f692 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2888,10 +2888,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { if (EnableVPlanNativePath) fixNonInductionPHIs(State); - // Forget the original basic block. - PSE.getSE()->forgetLoop(OrigLoop); - PSE.getSE()->forgetBlockAndLoopDispositions(); - // After vectorization, the exit blocks of the original loop will have // additional predecessors. Invalidate SCEVs for the exit phis in case SE // looked through single-entry phis. @@ -2901,6 +2897,10 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { for (PHINode &PN : Exit->phis()) PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); + // Forget the original basic block. + PSE.getSE()->forgetLoop(OrigLoop); + PSE.getSE()->forgetBlockAndLoopDispositions(); + // Don't apply optimizations below when no vector region remains, as they all // require a vector loop at the moment. if (!State.Plan->getVectorLoopRegion()) diff --git a/llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll new file mode 100644 index 000..235a8f0fa34a839 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print,loop-vectorize' -force-vector-width=4 -scalar-evolution-classify-expressions=false -S %s | FileCheck %s + +; Test case for https://github.com/llvm/llvm-project/issues/119665. + +; %loop.2's backedge-taken-count depends on %add.1 from %loop.1 via its +; corresponding SCEV at the scope of %loop.2. After vectorizing %loop.1, %add.1 +; isn't available at the entry of %loop.2 anymore and %add.1 at %loop.2's scope +; must be invalidated, as well as %loop.2's backedge-taken count. +define void @test_invalidate_scevs_at_scope(ptr %p) { +; CHECK-LABEL: define void @test_invalidate_scevs_at_scope( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT:br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT:br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT:[[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:[[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT:[[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT:[[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT:[[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] +; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT:[[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT:[[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT:br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT:[[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +; CHECK-NEXT:br i1 false, label %[[EXIT_1:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT:[[BC_RESUME_VAL:%.*]] = phi i32 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT:br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT:[[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT:[[TMP4:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT:[[ADD_1:%.*]] = add i32 [[TMP4]], [[IV_1]] +; CHECK-NEXT:[[IV_1_NEXT]] = add i32 [[IV_1]], 1 +; CHECK-NEXT:[[C_1:%.*]] = icmp eq i32 [[IV_1]], 100 +; CHECK-NEXT:br i1 [[C_1]], label %[[EXIT_1]], label %[[LOOP_1]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT_1]]: +; CHECK-NEXT:[[ADD_LCSSA:%.*]] = phi i32 [ [[ADD_1]], %[[LOOP_1]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT:[[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[ADD_LCSSA]],
[llvm-branch-commits] [llvm] release/20.x: [LV] Forget LCSSA phi with new pred before other SCEV invalidation. (#119897) (PR #126542)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126542 Backport 3706dfef660097f24fb5efbac0d7f14b424492ed Requested by: @fhahn >From 8bec9a4aaec8de57907ff658c96b5fe7826e2fee Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 10 Feb 2025 16:29:42 + Subject: [PATCH] [LV] Forget LCSSA phi with new pred before other SCEV invalidation. (#119897) `forgetLcssaPhiWithNewPredecessor` performs additional invalidation if there is an existing SCEV for the phi, but earlier `forgetBlockAndLoopDispositions` or `forgetLoop` may already invalidate the SCEV for the phi. Change the order to first call `forgetLcssaPhiWithNewPredecessor` to ensure it runs before its SCEV gets invalidated too eagerly. Fixes https://github.com/llvm/llvm-project/issues/119665. PR: https://github.com/llvm/llvm-project/pull/119897 (cherry picked from commit 3706dfef660097f24fb5efbac0d7f14b424492ed) --- .../Transforms/Vectorize/LoopVectorize.cpp| 8 +- ...idate-scev-at-scope-after-vectorization.ll | 118 ++ 2 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 06c2a91f89b1c5b..0ceeec48487f692 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2888,10 +2888,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { if (EnableVPlanNativePath) fixNonInductionPHIs(State); - // Forget the original basic block. - PSE.getSE()->forgetLoop(OrigLoop); - PSE.getSE()->forgetBlockAndLoopDispositions(); - // After vectorization, the exit blocks of the original loop will have // additional predecessors. Invalidate SCEVs for the exit phis in case SE // looked through single-entry phis. @@ -2901,6 +2897,10 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { for (PHINode &PN : Exit->phis()) PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); + // Forget the original basic block. + PSE.getSE()->forgetLoop(OrigLoop); + PSE.getSE()->forgetBlockAndLoopDispositions(); + // Don't apply optimizations below when no vector region remains, as they all // require a vector loop at the moment. if (!State.Plan->getVectorLoopRegion()) diff --git a/llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll new file mode 100644 index 000..235a8f0fa34a839 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/invalidate-scev-at-scope-after-vectorization.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print,loop-vectorize' -force-vector-width=4 -scalar-evolution-classify-expressions=false -S %s | FileCheck %s + +; Test case for https://github.com/llvm/llvm-project/issues/119665. + +; %loop.2's backedge-taken-count depends on %add.1 from %loop.1 via its +; corresponding SCEV at the scope of %loop.2. After vectorizing %loop.1, %add.1 +; isn't available at the entry of %loop.2 anymore and %add.1 at %loop.2's scope +; must be invalidated, as well as %loop.2's backedge-taken count. +define void @test_invalidate_scevs_at_scope(ptr %p) { +; CHECK-LABEL: define void @test_invalidate_scevs_at_scope( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT:br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT:br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT:[[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:[[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT:[[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT:[[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT:[[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] +; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT:[[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT:[[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT:br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT:[[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +; CHECK-NEXT:br i1 false, label %[[EXIT_1:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT:[[BC_RESUME_VAL:%.*]] = phi i32 [ 100, %[[MIDDLE_B
[llvm-branch-commits] [llvm] release/20.x: [LV] Forget LCSSA phi with new pred before other SCEV invalidation. (#119897) (PR #126542)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126542 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [LV] Forget LCSSA phi with new pred before other SCEV invalidation. (#119897) (PR #126542)
llvmbot wrote: @nikic What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126542 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) (PR #126367)
https://github.com/AmrDeveloper approved this pull request. https://github.com/llvm/llvm-project/pull/126367 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [ELF] --package-metadata: support %[0-9a-fA-F][0-9a-fA-F] (PR #126549)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126549 Backport 0a470a926481d370251731cb2dd897531756335f Requested by: @MaskRay >From e744e864972a072d65d3eb351674e7069baeb8e5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 10 Feb 2025 09:21:31 -0800 Subject: [PATCH] [ELF] --package-metadata: support %[0-9a-fA-F][0-9a-fA-F] (This application-specific option is probably not appropriate as a linker option (.o file offers more flexibility and decouples JSON verification from linkers). However, the option has gained some traction in Linux distributions, with support in GNU ld, gold, and mold.) GNU ld has supported percent-encoded bytes and extensions like `%[comma]` since November 2024. mold supports just percent-encoded bytes. To prepare for potential adoption by Ubuntu, let's support percent-encoded bytes. Link: https://sourceware.org/bugzilla/show_bug.cgi?id=32003 Link: https://bugs.launchpad.net/ubuntu/+source/dpkg/+bug/2071468 Pull Request: https://github.com/llvm/llvm-project/pull/126396 (cherry picked from commit 0a470a926481d370251731cb2dd897531756335f) --- lld/ELF/Config.h| 2 +- lld/ELF/Driver.cpp | 23 ++- lld/ELF/Options.td | 2 +- lld/docs/ld.lld.1 | 4 lld/test/ELF/package-metadata.s | 20 +++- 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index c2aadb2cef5200f..98e52b52ea46a11 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -407,7 +407,7 @@ struct Config { StringRef thinLTOJobs; unsigned timeTraceGranularity; int32_t splitStackAdjustSize; - StringRef packageMetadata; + SmallVector packageMetadata; // The following config options do not directly correspond to any // particular command line options. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 9d0c992c1e85164..2d8a5ade2fecee5 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -822,6 +822,26 @@ static ICFLevel getICF(opt::InputArgList &args) { return ICFLevel::All; } +static void parsePackageMetadata(Ctx &ctx, const opt::Arg &arg) { + unsigned c0, c1; + SmallVector decoded; + StringRef s = arg.getValue(); + for (size_t i = 0, e = s.size(); i != e; ++i) { +if (s[i] != '%') { + decoded.push_back(s[i]); +} else if (i + 2 < e && (c1 = hexDigitValue(s[i + 1])) != -1u && + (c0 = hexDigitValue(s[i + 2])) != -1u) { + decoded.push_back(uint8_t(c1 * 16 + c0)); + i += 2; +} else { + ErrAlways(ctx) << arg.getSpelling() << ": invalid % escape at byte " << i + << "; supports only %[0-9a-fA-F][0-9a-fA-F]"; + return; +} + } + ctx.arg.packageMetadata = std::move(decoded); +} + static StripPolicy getStrip(Ctx &ctx, opt::InputArgList &args) { if (args.hasArg(OPT_relocatable)) return StripPolicy::None; @@ -1383,7 +1403,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { ctx.arg.optimize = args::getInteger(args, OPT_O, 1); ctx.arg.orphanHandling = getOrphanHandling(ctx, args); ctx.arg.outputFile = args.getLastArgValue(OPT_o); - ctx.arg.packageMetadata = args.getLastArgValue(OPT_package_metadata); + if (auto *arg = args.getLastArg(OPT_package_metadata)) +parsePackageMetadata(ctx, *arg); ctx.arg.pie = args.hasFlag(OPT_pie, OPT_no_pie, false); ctx.arg.printIcfSections = args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c31875305952fb2..d9998176d0dd407 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -565,7 +565,7 @@ def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"">, def visual_studio_diagnostics_format : FF<"vs-diagnostics">, HelpText<"Format diagnostics for Visual Studio compatibility">; -def package_metadata: JJ<"package-metadata=">, HelpText<"Emit package metadata note">; +def package_metadata: JJ<"package-metadata=">, HelpText<"Emit a percent-encoded string to the .note.package section">; // Aliases def: Separate<["-"], "dT">, Alias, HelpText<"Alias for --default-script">; diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index b28c6082f68b091..b5c1816ce6e5fde 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -493,6 +493,10 @@ If .Fl -use-android-relr-tags is specified, use SHT_ANDROID_RELR instead of SHT_RELR. .Pp +.It Fl -package-metadata +Emit a percent-encoded string to the +.Cm .note.package +section. For example, %25 decodes to a single %. .It Fl -pic-veneer Always generate position independent thunks. .It Fl -pie , Fl -pic-executable diff --git a/lld/test/ELF/package-metadata.s b/lld/test/ELF/package-metadata.s index 29df499d7e98d29..a70a8940d7c68bb 100644 --- a/lld/test/ELF/package-metadata.s +++ b/lld/test/ELF/package-metadata.s @@ -1,12 +1,15 @@ # REQUIRES: x86 +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 a
[llvm-branch-commits] [lld] release/20.x: [ELF] --package-metadata: support %[0-9a-fA-F][0-9a-fA-F] (PR #126549)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126549 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [ELF] --package-metadata: support %[0-9a-fA-F][0-9a-fA-F] (PR #126549)
llvmbot wrote: @llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: None (llvmbot) Changes Backport 0a470a926481d370251731cb2dd897531756335f Requested by: @MaskRay --- Full diff: https://github.com/llvm/llvm-project/pull/126549.diff 5 Files Affected: - (modified) lld/ELF/Config.h (+1-1) - (modified) lld/ELF/Driver.cpp (+22-1) - (modified) lld/ELF/Options.td (+1-1) - (modified) lld/docs/ld.lld.1 (+4) - (modified) lld/test/ELF/package-metadata.s (+15-5) ``diff diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index c2aadb2cef5200f..98e52b52ea46a11 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -407,7 +407,7 @@ struct Config { StringRef thinLTOJobs; unsigned timeTraceGranularity; int32_t splitStackAdjustSize; - StringRef packageMetadata; + SmallVector packageMetadata; // The following config options do not directly correspond to any // particular command line options. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 9d0c992c1e85164..2d8a5ade2fecee5 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -822,6 +822,26 @@ static ICFLevel getICF(opt::InputArgList &args) { return ICFLevel::All; } +static void parsePackageMetadata(Ctx &ctx, const opt::Arg &arg) { + unsigned c0, c1; + SmallVector decoded; + StringRef s = arg.getValue(); + for (size_t i = 0, e = s.size(); i != e; ++i) { +if (s[i] != '%') { + decoded.push_back(s[i]); +} else if (i + 2 < e && (c1 = hexDigitValue(s[i + 1])) != -1u && + (c0 = hexDigitValue(s[i + 2])) != -1u) { + decoded.push_back(uint8_t(c1 * 16 + c0)); + i += 2; +} else { + ErrAlways(ctx) << arg.getSpelling() << ": invalid % escape at byte " << i + << "; supports only %[0-9a-fA-F][0-9a-fA-F]"; + return; +} + } + ctx.arg.packageMetadata = std::move(decoded); +} + static StripPolicy getStrip(Ctx &ctx, opt::InputArgList &args) { if (args.hasArg(OPT_relocatable)) return StripPolicy::None; @@ -1383,7 +1403,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { ctx.arg.optimize = args::getInteger(args, OPT_O, 1); ctx.arg.orphanHandling = getOrphanHandling(ctx, args); ctx.arg.outputFile = args.getLastArgValue(OPT_o); - ctx.arg.packageMetadata = args.getLastArgValue(OPT_package_metadata); + if (auto *arg = args.getLastArg(OPT_package_metadata)) +parsePackageMetadata(ctx, *arg); ctx.arg.pie = args.hasFlag(OPT_pie, OPT_no_pie, false); ctx.arg.printIcfSections = args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c31875305952fb2..d9998176d0dd407 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -565,7 +565,7 @@ def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"">, def visual_studio_diagnostics_format : FF<"vs-diagnostics">, HelpText<"Format diagnostics for Visual Studio compatibility">; -def package_metadata: JJ<"package-metadata=">, HelpText<"Emit package metadata note">; +def package_metadata: JJ<"package-metadata=">, HelpText<"Emit a percent-encoded string to the .note.package section">; // Aliases def: Separate<["-"], "dT">, Alias, HelpText<"Alias for --default-script">; diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index b28c6082f68b091..b5c1816ce6e5fde 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -493,6 +493,10 @@ If .Fl -use-android-relr-tags is specified, use SHT_ANDROID_RELR instead of SHT_RELR. .Pp +.It Fl -package-metadata +Emit a percent-encoded string to the +.Cm .note.package +section. For example, %25 decodes to a single %. .It Fl -pic-veneer Always generate position independent thunks. .It Fl -pie , Fl -pic-executable diff --git a/lld/test/ELF/package-metadata.s b/lld/test/ELF/package-metadata.s index 29df499d7e98d29..a70a8940d7c68bb 100644 --- a/lld/test/ELF/package-metadata.s +++ b/lld/test/ELF/package-metadata.s @@ -1,12 +1,15 @@ # REQUIRES: x86 +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o -# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld a.o --package-metadata='{}' +# RUN: llvm-readelf -n a.out | FileCheck %s --check-prefixes=NOTE,FIRST -# RUN: ld.lld %t.o -o %t --package-metadata='{}' -# RUN: llvm-readelf -n %t | FileCheck %s --check-prefixes=NOTE,FIRST +# RUN: ld.lld a.o --package-metadata='{"abc":123}' +# RUN: llvm-readelf -n a.out | FileCheck %s --check-prefixes=NOTE,SECOND -# RUN: ld.lld %t.o -o %t --package-metadata='{"abc":123}' -# RUN: llvm-readelf -n %t | FileCheck %s --check-prefixes=NOTE,SECOND +# RUN: ld.lld a.o --package-metadata='%7b%22abc%22:123%7D' +# RUN: llvm-readelf -n a.out | FileCheck %s --check-prefixes=NOTE,SECOND # NOTE: .note.package # NOTE-NEXT: Owner @@ -14,6 +17,13 @@ # FIRST-NEXT: description data: 7b 7d 00 # SECOND-NEXT: description data: 7b 22 61 62 63 22 3a 31 32 33 7d 00 +# RUN: not ld.lld a.o --package-metadata='%7b%
[llvm-branch-commits] [lld] release/20.x: [ELF] --package-metadata: support %[0-9a-fA-F][0-9a-fA-F] (PR #126549)
llvmbot wrote: @smithp35 What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126549 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [MLIR][OpenMP] Add Lowering support for OpenMP custom mappers in map clause (PR #121001)
https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/121001 >From c93eefb9cc4db635a694f16aee6b42525cd05e79 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Mon, 23 Dec 2024 21:13:42 + Subject: [PATCH 1/4] Add flang lowering changes for mapper field in map clause. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 32 + flang/lib/Lower/OpenMP/ClauseProcessor.h| 3 +- flang/test/Lower/OpenMP/Todo/map-mapper.f90 | 16 --- flang/test/Lower/OpenMP/map-mapper.f90 | 23 +++ 4 files changed, 52 insertions(+), 22 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/Todo/map-mapper.f90 create mode 100644 flang/test/Lower/OpenMP/map-mapper.f90 diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index febc6adcf9d6ff4..467a0dcebf2b8a9 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -969,8 +969,10 @@ void ClauseProcessor::processMapObjects( llvm::omp::OpenMPOffloadMappingFlags mapTypeBits, std::map &parentMemberIndices, llvm::SmallVectorImpl &mapVars, -llvm::SmallVectorImpl &mapSyms) const { +llvm::SmallVectorImpl &mapSyms, +std::string mapperIdName) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::FlatSymbolRefAttr mapperId; for (const omp::Object &object : objects) { llvm::SmallVector bounds; @@ -1003,6 +1005,20 @@ void ClauseProcessor::processMapObjects( } } +if (!mapperIdName.empty()) { + if (mapperIdName == "default") { +auto &typeSpec = object.sym()->owner().IsDerivedType() + ? *object.sym()->owner().derivedTypeSpec() + : object.sym()->GetType()->derivedTypeSpec(); +mapperIdName = typeSpec.name().ToString() + ".default"; +mapperIdName = converter.mangleName(mapperIdName, *typeSpec.GetScope()); + } + assert(converter.getMLIRSymbolTable()->lookup(mapperIdName) && + "mapper not found"); + mapperId = mlir::FlatSymbolRefAttr::get(&converter.getMLIRContext(), + mapperIdName); + mapperIdName.clear(); +} // Explicit map captures are captured ByRef by default, // optimisation passes may alter this to ByCopy or other capture // types to optimise @@ -1016,7 +1032,8 @@ void ClauseProcessor::processMapObjects( static_cast< std::underlying_type_t>( mapTypeBits), -mlir::omp::VariableCaptureKind::ByRef, baseOp.getType()); +mlir::omp::VariableCaptureKind::ByRef, baseOp.getType(), false, +mapperId); if (parentObj.has_value()) { parentMemberIndices[parentObj.value()].addChildIndexAndMapToParent( @@ -1047,6 +1064,7 @@ bool ClauseProcessor::processMap( const auto &[mapType, typeMods, mappers, iterator, objects] = clause.t; llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; +std::string mapperIdName; // If the map type is specified, then process it else Tofrom is the // default. Map::MapType type = mapType.value_or(Map::MapType::Tofrom); @@ -1090,13 +1108,17 @@ bool ClauseProcessor::processMap( "Support for iterator modifiers is not implemented yet"); } if (mappers) { - TODO(currentLocation, - "Support for mapper modifiers is not implemented yet"); + assert(mappers->size() == 1 && "more than one mapper"); + mapperIdName = mappers->front().v.id().symbol->name().ToString(); + if (mapperIdName != "default") +mapperIdName = converter.mangleName( +mapperIdName, mappers->front().v.id().symbol->owner()); } processMapObjects(stmtCtx, clauseLocation, std::get(clause.t), mapTypeBits, - parentMemberIndices, result.mapVars, *ptrMapSyms); + parentMemberIndices, result.mapVars, *ptrMapSyms, + mapperIdName); }; bool clauseFound = findRepeatableClause(process); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index e05f66c7666844f..2b319e890a5adbe 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -175,7 +175,8 @@ class ClauseProcessor { llvm::omp::OpenMPOffloadMappingFlags mapTypeBits, std::map &parentMemberIndices, llvm::SmallVectorImpl &mapVars, - llvm::SmallVectorImpl &mapSyms) const; + llvm::SmallVectorImpl &mapSyms, + std::string mapperIdName = "") const; lower::AbstractConverter &converter; semantics::SemanticsContext &semaCtx; diff --git a/flang/test/Lower/OpenMP/Todo/map-mapper.f90 b/flang/test/Lower/OpenMP/Todo/map-mapper.f90 deleted file mode 100644 index 9554ffd5fda7bdd..000 --- a/fla
[llvm-branch-commits] [llvm] Add release note for Armv9.6 updates (PR #126513)
@@ -151,6 +151,9 @@ Changes to the AArch64 Backend * Added support for the FUJITSU-MONAKA CPU. +* Updated feature dependency in Armv9.6 for FEAT_FAMINMAX, FEAT_LUT and mgabka wrote: Could you rewrite it a bit to: "Updated features dependencies in Armv9.6 definition for FEAT_FAMINMAX, FEAT_LUT and FEAT_FP8, now they depend only on FEAT_NEON." https://github.com/llvm/llvm-project/pull/126513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3529,6 +3549,84 @@ static void genMapInfos(llvm::IRBuilderBase &builder, } } +static llvm::Expected +emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +static llvm::Expected +getOrCreateUserDefinedMapperFunc(Operation *declMapperOp, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + static llvm::DenseMap userDefMapperMap; + auto iter = userDefMapperMap.find(declMapperOp); + if (iter != userDefMapperMap.end()) +return iter->second; + llvm::Expected mapperFunc = + emitUserDefinedMapper(declMapperOp, builder, moduleTranslation); + if (!mapperFunc) +return mapperFunc.takeError(); + userDefMapperMap.try_emplace(declMapperOp, *mapperFunc); + return mapperFunc; +} + +static llvm::Expected +emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto declMapperOp = cast(op); + auto declMapperInfoOp = + *declMapperOp.getOps().begin(); skatrak wrote: Nit: Related to another comment I left for this PR stack: ```suggestion auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo(); ``` https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Add OMP Mapper field to MapInfoOp (PR #120994)
https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/120994 >From f8642ec5b59e8617d2f1b8a87938ce2d6ba25205 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Mon, 23 Dec 2024 20:53:47 + Subject: [PATCH 1/2] Add mapper field to mapInfoOp. --- flang/lib/Lower/OpenMP/Utils.cpp| 3 ++- flang/lib/Lower/OpenMP/Utils.h | 3 ++- flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp | 5 - flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp | 1 + mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 2 ++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp| 2 +- mlir/test/Dialect/OpenMP/ops.mlir | 4 ++-- 7 files changed, 14 insertions(+), 6 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 35722fa7d1b1206..fa1975dac789b12 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -125,7 +125,7 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, llvm::ArrayRef members, mlir::ArrayAttr membersIndex, uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, -bool partialMap) { +bool partialMap, mlir::FlatSymbolRefAttr mapperId) { if (auto boxTy = llvm::dyn_cast(baseAddr.getType())) { baseAddr = builder.create(loc, baseAddr); retTy = baseAddr.getType(); @@ -144,6 +144,7 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, mlir::omp::MapInfoOp op = builder.create( loc, retTy, baseAddr, varType, varPtrPtr, members, membersIndex, bounds, builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), + mapperId, builder.getAttr(mapCaptureType), builder.getStringAttr(name), builder.getBoolAttr(partialMap)); return op; diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index f2e378443e5f295..3943eb633b04e36 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -116,7 +116,8 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, llvm::ArrayRef members, mlir::ArrayAttr membersIndex, uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, -bool partialMap = false); +bool partialMap = false, +mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr()); void insertChildMapInfoIntoParent( Fortran::lower::AbstractConverter &converter, diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index e7c1d1d9d560f87..beea7543e54b324 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -184,6 +184,7 @@ class MapInfoFinalizationPass /*members=*/mlir::SmallVector{}, /*membersIndex=*/mlir::ArrayAttr{}, bounds, builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), +/*mapperId*/ mlir::FlatSymbolRefAttr(), builder.getAttr( mlir::omp::VariableCaptureKind::ByRef), /*name=*/builder.getStringAttr(""), @@ -329,7 +330,8 @@ class MapInfoFinalizationPass builder.getIntegerAttr( builder.getIntegerType(64, false), getDescriptorMapType(op.getMapType().value_or(0), target)), -op.getMapCaptureTypeAttr(), op.getNameAttr(), +/*mapperId*/ mlir::FlatSymbolRefAttr(), op.getMapCaptureTypeAttr(), +op.getNameAttr(), /*partial_map=*/builder.getBoolAttr(false)); op.replaceAllUsesWith(newDescParentMapOp.getResult()); op->erase(); @@ -623,6 +625,7 @@ class MapInfoFinalizationPass /*members=*/mlir::ValueRange{}, /*members_index=*/mlir::ArrayAttr{}, /*bounds=*/bounds, op.getMapTypeAttr(), + /*mapperId*/ mlir::FlatSymbolRefAttr(), builder.getAttr( mlir::omp::VariableCaptureKind::ByRef), builder.getStringAttr(op.getNameAttr().strref() + "." + diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 963ae863c1fc5cb..97ea463a3c495df 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -91,6 +91,7 @@ class MapsForPrivatizedSymbolsPass /*bounds=*/ValueRange{}, builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false), mapTypeTo), +/*mapperId*/ mlir::FlatSymbolRefAttr(), builder.getAttr( omp::VariableCaptureKind::ByRef), StringAttr(), builder.getBoolAttr(false)); diff --git a/ml
[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Add conversion support from FIR to LLVM Dialect for OMP DeclareMapper (PR #121005)
https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/121005 >From 879d8a1765b9176fce18c02bdcc29bb079b6ba7e Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Mon, 23 Dec 2024 21:50:03 + Subject: [PATCH 1/2] Add OpenMP to LLVM dialect conversion support for DeclareMapperOp. --- .../Fir/convert-to-llvm-openmp-and-fir.fir| 27 +-- .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 48 +++ .../OpenMPToLLVM/convert-to-llvmir.mlir | 13 + 3 files changed, 74 insertions(+), 14 deletions(-) diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 8e4e1fe824d9f5b..82f2aea3ad983c9 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -936,9 +936,9 @@ func.func @omp_map_info_descriptor_type_conversion(%arg0 : !fir.ref>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr> {name = ""} // CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, delete) capture(ByRef) members(%[[MEMBER_MAP]] : [0] : !llvm.ptr) -> !llvm.ptr {name = ""} %2 = omp.map.info var_ptr(%arg0 : !fir.ref>>, !fir.box>) map_clauses(always, delete) capture(ByRef) members(%1 : [0] : !fir.llvm_ptr>) -> !fir.ref>> {name = ""} - // CHECK: omp.target_exit_data map_entries(%[[DESC_MAP]] : !llvm.ptr) + // CHECK: omp.target_exit_data map_entries(%[[DESC_MAP]] : !llvm.ptr) omp.target_exit_data map_entries(%2 : !fir.ref>>) - return + return } // - @@ -956,8 +956,8 @@ func.func @omp_map_info_derived_type_explicit_member_conversion(%arg0 : !fir.ref %3 = fir.field_index real, !fir.type<_QFderived_type{real:f32,array:!fir.array<10xi32>,int:i32}> %4 = fir.coordinate_of %arg0, %3 : (!fir.ref,int:i32}>>, !fir.field) -> !fir.ref // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%[[GEP_2]] : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "dtype%real"} - %5 = omp.map.info var_ptr(%4 : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "dtype%real"} - // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [2], [0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "dtype", partial_map = true} + %5 = omp.map.info var_ptr(%4 : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "dtype%real"} + // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [2], [0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "dtype", partial_map = true} %6 = omp.map.info var_ptr(%arg0 : !fir.ref,int:i32}>>, !fir.type<_QFderived_type{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(tofrom) capture(ByRef) members(%2, %5 : [2], [0] : !fir.ref, !fir.ref) -> !fir.ref,int:i32}>> {name = "dtype", partial_map = true} // CHECK: omp.target map_entries(%[[MAP_MEMBER_1]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_2]] -> %[[ARG_2:.*]], %[[MAP_PARENT]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.target map_entries(%2 -> %arg1, %5 -> %arg2, %6 -> %arg3 : !fir.ref, !fir.ref, !fir.ref,int:i32}>>) { @@ -1275,3 +1275,22 @@ func.func @map_nested_dtype_alloca_mem2(%arg0 : !fir.ref { +omp.declare_mapper @my_mapper : !fir.type<_QFdeclare_mapperTmy_type{data:i32}> { +// CHECK: ^bb0(%[[VAL_0:.*]]: !llvm.ptr): +^bb0(%0: !fir.ref>): +// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(0 : i32) : i32 + %1 = fir.field_index data, !fir.type<_QFdeclare_mapperTmy_type{data:i32}> +// CHECK: %[[VAL_2:.*]] = llvm.getelementptr %[[VAL_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFdeclare_mapperTmy_type", (i32)> + %2 = fir.coordinate_of %0, %1 : (!fir.ref>, !fir.field) -> !fir.ref +// CHECK: %[[VAL_3:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var%[[VAL_4:.*]]"} + %3 = omp.map.info var_ptr(%2 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "var%data"} +// CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !llvm.ptr, !llvm.struct<"_QFdeclare_mapperTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) members(%[[VAL_3]] : [0] : !llvm.ptr) -> !llvm.ptr {name = "var", partial_map = true} + %4 = omp.map.info var_ptr(%0 : !fir.ref>, !fir.type<_QFdeclare_mapperTmy_type{data:i32}>) map_clauses(tofrom) capture(ByRef) members(%3 : [0] : !fir.ref) -> !fir.ref> {name = "var", partial_map = true} +// CHECK: omp.declare_mapper_info map_entries(%[[VAL_5]], %[[VAL_3]] : !llvm.ptr, !llvm.ptr) + omp.declare_m
[llvm-branch-commits] [libcxx] [libc++] Make benchmarks dry-run by default on the release branch (PR #126441)
https://github.com/mordante approved this pull request. Thanks, LGTM! https://github.com/llvm/llvm-project/pull/126441 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Add OMP Mapper field to MapInfoOp (PR #120994)
skatrak wrote: > Is that necessary given I have the entire implementation in place? I am > planning on merging the entire series in one go. If you plan on merging the whole stack at once and if it will add support for declare mappers to all operations that take map clauses, then I agree it would be fine to skip adding any not-yet-implemented errors here. https://github.com/llvm/llvm-project/pull/120994 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [MLIR][OpenMP] Add Lowering support for OpenMP custom mappers in map clause (PR #121001)
@@ -936,8 +936,10 @@ void ClauseProcessor::processMapObjects( llvm::omp::OpenMPOffloadMappingFlags mapTypeBits, std::map &parentMemberIndices, llvm::SmallVectorImpl &mapVars, -llvm::SmallVectorImpl &mapSyms) const { +llvm::SmallVectorImpl &mapSyms, +std::string mapperIdName) const { skatrak wrote: Accepting a `StringRef` gives us more flexibility while passing arguments (plus, it gives us the option to rework the implementation to avoid copies later on), it follows [LLVM recommendations](https://llvm.org/docs/ProgrammersManual.html#passing-strings-the-stringref-and-twine-classes) and I don't see any drawbacks from using it here. In my opinion, I still think it's a better option. If you feel strongly about it, this is not a blocking issue for me. https://github.com/llvm/llvm-project/pull/121001 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #126503)
https://github.com/sdesmalen-arm approved this pull request. There is no risk in adding this to the release branch, because all functionality is hidden behind a flag. The TableGen/SubtargetEmitter.cpp change should not affect anything, because it merely emits an extra `enum class` to the `*GenSubtargetInfo.cpp` file. https://github.com/llvm/llvm-project/pull/126503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [MLIR][OpenMP] Add Lowering support for OpenMP Declare Mapper directive (PR #117046)
@@ -2612,7 +2612,52 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) { - TODO(converter.getCurrentLocation(), "OpenMPDeclareMapperConstruct"); + mlir::Location loc = converter.genLocation(declareMapperConstruct.source); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + lower::StatementContext stmtCtx; + const auto &spec = + std::get(declareMapperConstruct.t); + const auto &mapperName{std::get>(spec.t)}; + const auto &varType{std::get(spec.t)}; + const auto &varName{std::get(spec.t)}; + assert(varType.declTypeSpec->category() == + semantics::DeclTypeSpec::Category::TypeDerived && + "Expected derived type"); + + std::string mapperNameStr; + if (mapperName.has_value()) { +mapperNameStr = mapperName->ToString(); +mapperNameStr = +converter.mangleName(mapperNameStr, mapperName->symbol->owner()); + } else { +mapperNameStr = +varType.declTypeSpec->derivedTypeSpec().name().ToString() + ".default"; +mapperNameStr = converter.mangleName( +mapperNameStr, *varType.declTypeSpec->derivedTypeSpec().GetScope()); + } + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); skatrak wrote: Can you point me to the exact test you're referring to? I took a look at that PR and couldn't find any that checked this. https://github.com/llvm/llvm-project/pull/117046 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) (PR #126518)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-driver Author: None (llvmbot) Changes Backport 71adb054024a1e9bd5ed4566beda74dea65362cd Requested by: @nico --- Full diff: https://github.com/llvm/llvm-project/pull/126518.diff 2 Files Affected: - (modified) clang/include/clang/Driver/Options.td (+3-3) - (modified) clang/test/Driver/cl-options.c (+4) ``diff diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a2b47b943ef90dd..02e5c4cbb4bff90 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3505,8 +3505,6 @@ def fno_strict_aliasing : Flag<["-"], "fno-strict-aliasing">, Group, def fstruct_path_tbaa : Flag<["-"], "fstruct-path-tbaa">, Group; def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group; def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group; -def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; defm init_global_zero : BoolOptionWithoutMarshalling<"f", "init-global-zero", PosFlag, @@ -4023,7 +4021,9 @@ defm strict_vtable_pointers : BoolFOption<"strict-vtable-pointers", " overwriting polymorphic C++ objects">, NegFlag>; def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; + Visibility<[ClangOption, CLOption, FlangOption]>; +def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, + Visibility<[ClangOption, CLOption, FlangOption]>; def fpointer_tbaa : Flag<["-"], "fpointer-tbaa">, Group; def fdriver_only : Flag<["-"], "fdriver-only">, Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption]>, diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 29a0fcbc17ac603..9f9ca1bf1a8fdc4 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -738,9 +738,13 @@ // RUN: -fno-modules-search-all \ // RUN: -fimplicit-modules \ // RUN: -fno-implicit-modules \ +// RUN: -fstrict-overflow \ +// RUN: -fno-strict-overflow \ // RUN: -ftrivial-auto-var-init=zero \ // RUN: -fwrapv \ // RUN: -fno-wrapv \ +// RUN: -fwrapv-pointer \ +// RUN: -fno-wrapv-pointer \ // RUN: --version \ // RUN: -Werror /Zs -- %s 2>&1 `` https://github.com/llvm/llvm-project/pull/126518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) (PR #126518)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126518 Backport 71adb054024a1e9bd5ed4566beda74dea65362cd Requested by: @nico >From 0ac833ec8514898444c6b510a2ac58ed129d03b9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 10 Feb 2025 09:00:31 -0500 Subject: [PATCH] [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) Also move the -fno-strict-overflow option definition next to the -fstrict-overflow one while here. Also add test coverage for f(no-)wrapv-pointer being a clang-cl option. (cherry picked from commit 71adb054024a1e9bd5ed4566beda74dea65362cd) --- clang/include/clang/Driver/Options.td | 6 +++--- clang/test/Driver/cl-options.c| 4 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a2b47b943ef90dd..02e5c4cbb4bff90 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3505,8 +3505,6 @@ def fno_strict_aliasing : Flag<["-"], "fno-strict-aliasing">, Group, def fstruct_path_tbaa : Flag<["-"], "fstruct-path-tbaa">, Group; def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group; def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group; -def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; defm init_global_zero : BoolOptionWithoutMarshalling<"f", "init-global-zero", PosFlag, @@ -4023,7 +4021,9 @@ defm strict_vtable_pointers : BoolFOption<"strict-vtable-pointers", " overwriting polymorphic C++ objects">, NegFlag>; def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; + Visibility<[ClangOption, CLOption, FlangOption]>; +def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, + Visibility<[ClangOption, CLOption, FlangOption]>; def fpointer_tbaa : Flag<["-"], "fpointer-tbaa">, Group; def fdriver_only : Flag<["-"], "fdriver-only">, Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption]>, diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 29a0fcbc17ac603..9f9ca1bf1a8fdc4 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -738,9 +738,13 @@ // RUN: -fno-modules-search-all \ // RUN: -fimplicit-modules \ // RUN: -fno-implicit-modules \ +// RUN: -fstrict-overflow \ +// RUN: -fno-strict-overflow \ // RUN: -ftrivial-auto-var-init=zero \ // RUN: -fwrapv \ // RUN: -fno-wrapv \ +// RUN: -fwrapv-pointer \ +// RUN: -fno-wrapv-pointer \ // RUN: --version \ // RUN: -Werror /Zs -- %s 2>&1 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) (PR #126518)
llvmbot wrote: @zmodem What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/126518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) (PR #126518)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/126518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #126503)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) Changes This cherry-picks commits: 82c6b8f7bbebc32751170267bbb7712f028cf587 and e470dcae8d2c4138a89974ceeb413b1568d3a112. These are needed for the `-aarch64-enable-zpr-predicate-spills` flag. This is an off-by-default flag that converts predicate spills to data vector spills in streaming[-compatible] functions. We think this should be fairly low risk as this feature needs to be manually enabled, but we'd like this to be for users to experiment with in LLVM 20. --- Patch is 82.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126503.diff 11 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+325-5) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+15-1) - (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+2-2) - (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.h (+1-1) - (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.td (+10-1) - (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+19) - (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+2) - (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+14) - (added) llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir (+1013) - (modified) llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll (+12-1) - (modified) llvm/utils/TableGen/SubtargetEmitter.cpp (+21-1) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a082a1ebe95bf84..d3abd79b85a75f7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1634,6 +1634,9 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { case AArch64::STR_PXI: case AArch64::LDR_ZXI: case AArch64::LDR_PXI: + case AArch64::PTRUE_B: + case AArch64::CPY_ZPzI_B: + case AArch64::CMPNE_PPzZI_B: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); } @@ -3265,7 +3268,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI; break; case RegPairInfo::PPR: - StrOpc = AArch64::STR_PXI; + StrOpc = + Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI; break; case RegPairInfo::VG: StrOpc = AArch64::STRXui; @@ -3494,7 +3498,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI; break; case RegPairInfo::PPR: - LdrOpc = AArch64::LDR_PXI; + LdrOpc = Size == 16 ? AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO + : AArch64::LDR_PXI; break; case RegPairInfo::VG: continue; @@ -3720,6 +3725,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, continue; } +// Always save P4 when PPR spills are ZPR-sized and a predicate above p8 is +// spilled. If all of p0-p3 are used as return values p4 is must be free +// to reload p8-p15. +if (RegInfo->getSpillSize(AArch64::PPRRegClass) == 16 && +AArch64::PPR_p8to15RegClass.contains(Reg)) { + SavedRegs.set(AArch64::P4); +} + // MachO's compact unwind format relies on all registers being stored in // pairs. // FIXME: the usual format is actually better if unwinding isn't needed. @@ -4159,8 +4172,312 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( true); } +/// Attempts to scavenge a register from \p ScavengeableRegs given the used +/// registers in \p UsedRegs. +static Register tryScavengeRegister(LiveRegUnits const &UsedRegs, +BitVector const &ScavengeableRegs, +Register PreferredReg) { + if (PreferredReg != AArch64::NoRegister && UsedRegs.available(PreferredReg)) +return PreferredReg; + for (auto Reg : ScavengeableRegs.set_bits()) { +if (UsedRegs.available(Reg)) + return Reg; + } + return AArch64::NoRegister; +} + +/// Propagates frame-setup/destroy flags from \p SourceMI to all instructions in +/// \p MachineInstrs. +static void propagateFrameFlags(MachineInstr &SourceMI, +ArrayRef MachineInstrs) { + for (MachineInstr *MI : MachineInstrs) { +if (SourceMI.getFlag(MachineInstr::FrameSetup)) + MI->setFlag(MachineInstr::FrameSetup); +if (SourceMI.getFlag(MachineInstr::FrameDestroy)) + MI->setFlag(MachineInstr::FrameDestroy); + } +} + +/// RAII helper class for scavenging or spilling a register. On construction +/// attempts to find a free register of class \p RC (given \p UsedRegs and \p +/// AllocatableRegs), if no register can be found spills \p SpillCandidate to \p +/// MaybeSpillFI to free a register. The
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #126503)
https://github.com/MacDue milestoned https://github.com/llvm/llvm-project/pull/126503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #126503)
https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/126503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Add release note for Armv9.6 updates (PR #126513)
https://github.com/CarolineConcatto created https://github.com/llvm/llvm-project/pull/126513 None >From 5b132030f237beaed44aa87916bbdc98aa65d58d Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Mon, 10 Feb 2025 13:14:13 + Subject: [PATCH] Add release note for Armv9.6 updates --- llvm/docs/ReleaseNotes.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 44a0b17d6a07b97..23757f937ec6f85 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -151,6 +151,9 @@ Changes to the AArch64 Backend * Added support for the FUJITSU-MONAKA CPU. +* Updated feature dependency in Armv9.6 for FEAT_FAMINMAX, FEAT_LUT and + FEAT_FP8. They depend on FEAT_NEON only + Changes to the AMDGPU Backend - ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Add release note for Armv9.6 updates (PR #126513)
https://github.com/CarolineConcatto milestoned https://github.com/llvm/llvm-project/pull/126513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Handle f(no-)strict-overflow, f(no-)wrapv, f(no-)wrapv-pointer like gcc (#126524) (PR #126535)
https://github.com/carlocab approved this pull request. https://github.com/llvm/llvm-project/pull/126535 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) (PR #126367)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/126367 >From d5f3620eede4470b9387f7475dfd53ad7bdf7e9b Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Sat, 8 Feb 2025 14:14:16 +0100 Subject: [PATCH 1/2] [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) Fix printing the correct file path in the error message when the output file specified by `--dump-section` cannot be opened Fixes: #125113 on ELF, MachO, Wasm (cherry picked from commit 66bea0df75ccdd5ffed41d06c7301a116d11abcb) --- llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp | 59 ++- llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp | 27 + llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp | 15 ++--- .../tools/llvm-objcopy/ELF/dump-section.test | 4 ++ .../llvm-objcopy/MachO/dump-section.test | 4 ++ .../tools/llvm-objcopy/wasm/dump-section.test | 4 ++ 6 files changed, 64 insertions(+), 49 deletions(-) diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp index 5aa0079f3fbc7a7..9c78f7433ad3390 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -186,27 +186,28 @@ static std::unique_ptr createWriter(const CommonConfig &Config, } static Error dumpSectionToFile(StringRef SecName, StringRef Filename, - Object &Obj) { + StringRef InputFilename, Object &Obj) { for (auto &Sec : Obj.sections()) { if (Sec.Name == SecName) { if (Sec.Type == SHT_NOBITS) -return createStringError(object_error::parse_failed, - "cannot dump section '%s': it has no contents", - SecName.str().c_str()); +return createFileError(InputFilename, object_error::parse_failed, + "cannot dump section '%s': it has no contents", + SecName.str().c_str()); Expected> BufferOrErr = FileOutputBuffer::create(Filename, Sec.OriginalData.size()); if (!BufferOrErr) -return BufferOrErr.takeError(); +return createFileError(Filename, BufferOrErr.takeError()); std::unique_ptr Buf = std::move(*BufferOrErr); std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf->getBufferStart()); if (Error E = Buf->commit()) -return E; +return createFileError(Filename, std::move(E)); return Error::success(); } } - return createStringError(object_error::parse_failed, "section '%s' not found", - SecName.str().c_str()); + + return createFileError(InputFilename, object_error::parse_failed, + "section '%s' not found", SecName.str().c_str()); } Error Object::compressOrDecompressSections(const CommonConfig &Config) { @@ -798,7 +799,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, StringRef SectionName; StringRef FileName; std::tie(SectionName, FileName) = Flag.split('='); -if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) +if (Error E = +dumpSectionToFile(SectionName, FileName, Config.InputFilename, Obj)) return E; } @@ -807,10 +809,10 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, // us to avoid reporting the inappropriate errors about removing symbols // named in relocations. if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj)) -return E; +return createFileError(Config.InputFilename, std::move(E)); if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj)) -return E; +return createFileError(Config.InputFilename, std::move(E)); if (!Config.SetSectionAlignment.empty()) { for (SectionBase &Sec : Obj.sections()) { @@ -826,8 +828,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, if (Config.ChangeSectionLMAValAll > 0 && Seg.PAddr > std::numeric_limits::max() - Config.ChangeSectionLMAValAll) { - return createStringError( - errc::invalid_argument, + return createFileError( + Config.InputFilename, errc::invalid_argument, "address 0x" + Twine::utohexstr(Seg.PAddr) + " cannot be increased by 0x" + Twine::utohexstr(Config.ChangeSectionLMAValAll) + @@ -835,8 +837,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, } else if (Config.ChangeSectionLMAValAll < 0 && Seg.PAddr < std::numeric_limits::min() - Config.ChangeSectionLMAValAll) { - return createStringError( - errc::invalid_argument, + return createFileError( + Config.InputFilename, errc::invalid_argument, "a
[llvm-branch-commits] [ELF] Add support for CREL locations for SHT_LLVM_BB_ADDR_MAP (PR #126446)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/126446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [ELF] --package-metadata: support %[0-9a-fA-F][0-9a-fA-F] (PR #126549)
https://github.com/smithp35 approved this pull request. LGTM. This would be useful as it would be needed if Linux Distros start using this. https://github.com/llvm/llvm-project/pull/126549 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [ELF] Add support for CREL locations for SHT_LLVM_BB_ADDR_MAP (PR #126446)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/126446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 38333d5 - [ARM] Empty structs are 1-byte for C++ ABI (#124762)
Author: Oliver Stannard Date: 2025-02-10T13:30:35-08:00 New Revision: 38333d5e4258edaaaf449b720391d8bf2888e096 URL: https://github.com/llvm/llvm-project/commit/38333d5e4258edaaaf449b720391d8bf2888e096 DIFF: https://github.com/llvm/llvm-project/commit/38333d5e4258edaaaf449b720391d8bf2888e096.diff LOG: [ARM] Empty structs are 1-byte for C++ ABI (#124762) For C++ (but not C), empty structs should be passed to functions as if they are a 1 byte object with 1 byte alignment. This is defined in Arm's CPPABI32: https://github.com/ARM-software/abi-aa/blob/main/cppabi32/cppabi32.rst For the purposes of parameter passing in AAPCS32, a parameter whose type is an empty class shall be treated as if its type were an aggregate with a single member of type unsigned byte. The AArch64 equivalent of this has an exception for structs containing an array of size zero, I've kept that logic for ARM. I've not found a reason for this exception, but I've checked that GCC does have the same behaviour for ARM as it does for AArch64. The AArch64 version has an Apple ABI with different rules, which ignores empty structs in both C and C++. This is documented at https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms. The ARM equivalent of that appears to be AAPCS16_VFP, used for WatchOS, but I can't find any documentation for that ABI, so I'm not sure what rules it should follow. For now I've left it following the AArch64 Apple rules. Added: clang/test/CodeGen/arm-empty-args.cpp Modified: clang/docs/ReleaseNotes.rst clang/include/clang/Basic/LangOptions.h clang/lib/CodeGen/Targets/ARM.cpp Removed: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3281ac0c4dbe28c..b61563ade0a1e1d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1199,6 +1199,11 @@ Arm and AArch64 Support - Runtime detection of depended-on Function Multi Versioning features has been added in accordance with the Arm C Language Extensions (ACLE). +- The ARM calling convention for empty structs in C++ mode was changed to pass + them as if they have a size of 1 byte, matching the AAPCS32 specification and + GCC's implementation. The previous behaviour of ignoring the argument can be + restored using the -fclang-abi-compat=19 (or earlier) option. + Android Support ^^^ diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 114a5d34a008bd7..16c35bcf49339c6 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -246,6 +246,8 @@ class LangOptionsBase { /// construction vtable because it hasn't added 'type' as a substitution. /// - Skip mangling enclosing class templates of member-like friend /// function templates. +/// - Ignore empty struct arguments in C++ mode for ARM, instead of +/// passing them as if they had a size of 1 byte. Ver19, /// Conform to the underlying platform's C and C++ ABIs as closely diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index 2d858fa2f3c3a35..47e31ceeaf29431 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -71,6 +71,7 @@ class ARMABIInfo : public ABIInfo { unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; + bool shouldIgnoreEmptyArg(QualType Ty) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; bool containsAnyFP16Vectors(QualType Ty) const; @@ -328,6 +329,31 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align); } +bool ARMABIInfo::shouldIgnoreEmptyArg(QualType Ty) const { + uint64_t Size = getContext().getTypeSize(Ty); + assert((isEmptyRecord(getContext(), Ty, true) || Size == 0) && + "Arg is not empty"); + + // Empty records are ignored in C mode, and in C++ on WatchOS. + if (!getContext().getLangOpts().CPlusPlus || + getABIKind() == ARMABIKind::AAPCS16_VFP) +return true; + + // In C++ mode, arguments which have sizeof() == 0 are ignored. This is not a + // situation which is defined by any C++ standard or ABI, but this matches + // GCC's de facto ABI. + if (Size == 0) +return true; + + // Clang 19.0 and earlier always ignored empty struct arguments in C++ mode. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver19) +return true; + + // Otherwise, they are passed as if they have a size of 1 byte. + return false; +} + ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
[llvm-branch-commits] [clang] release/20.x: [ARM] Empty structs are 1-byte for C++ ABI (#124762) (PR #125194)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/125194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [ARM] Empty structs are 1-byte for C++ ABI (#124762) (PR #125194)
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/125194 >From 38333d5e4258edaaaf449b720391d8bf2888e096 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 31 Jan 2025 09:03:01 + Subject: [PATCH] [ARM] Empty structs are 1-byte for C++ ABI (#124762) For C++ (but not C), empty structs should be passed to functions as if they are a 1 byte object with 1 byte alignment. This is defined in Arm's CPPABI32: https://github.com/ARM-software/abi-aa/blob/main/cppabi32/cppabi32.rst For the purposes of parameter passing in AAPCS32, a parameter whose type is an empty class shall be treated as if its type were an aggregate with a single member of type unsigned byte. The AArch64 equivalent of this has an exception for structs containing an array of size zero, I've kept that logic for ARM. I've not found a reason for this exception, but I've checked that GCC does have the same behaviour for ARM as it does for AArch64. The AArch64 version has an Apple ABI with different rules, which ignores empty structs in both C and C++. This is documented at https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms. The ARM equivalent of that appears to be AAPCS16_VFP, used for WatchOS, but I can't find any documentation for that ABI, so I'm not sure what rules it should follow. For now I've left it following the AArch64 Apple rules. --- clang/docs/ReleaseNotes.rst | 5 + clang/include/clang/Basic/LangOptions.h | 2 + clang/lib/CodeGen/Targets/ARM.cpp | 45 +++- clang/test/CodeGen/arm-empty-args.cpp | 131 4 files changed, 178 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/arm-empty-args.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3281ac0c4dbe28c..b61563ade0a1e1d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1199,6 +1199,11 @@ Arm and AArch64 Support - Runtime detection of depended-on Function Multi Versioning features has been added in accordance with the Arm C Language Extensions (ACLE). +- The ARM calling convention for empty structs in C++ mode was changed to pass + them as if they have a size of 1 byte, matching the AAPCS32 specification and + GCC's implementation. The previous behaviour of ignoring the argument can be + restored using the -fclang-abi-compat=19 (or earlier) option. + Android Support ^^^ diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 114a5d34a008bd7..16c35bcf49339c6 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -246,6 +246,8 @@ class LangOptionsBase { /// construction vtable because it hasn't added 'type' as a substitution. /// - Skip mangling enclosing class templates of member-like friend /// function templates. +/// - Ignore empty struct arguments in C++ mode for ARM, instead of +/// passing them as if they had a size of 1 byte. Ver19, /// Conform to the underlying platform's C and C++ ABIs as closely diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index 2d858fa2f3c3a35..47e31ceeaf29431 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -71,6 +71,7 @@ class ARMABIInfo : public ABIInfo { unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; + bool shouldIgnoreEmptyArg(QualType Ty) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; bool containsAnyFP16Vectors(QualType Ty) const; @@ -328,6 +329,31 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align); } +bool ARMABIInfo::shouldIgnoreEmptyArg(QualType Ty) const { + uint64_t Size = getContext().getTypeSize(Ty); + assert((isEmptyRecord(getContext(), Ty, true) || Size == 0) && + "Arg is not empty"); + + // Empty records are ignored in C mode, and in C++ on WatchOS. + if (!getContext().getLangOpts().CPlusPlus || + getABIKind() == ARMABIKind::AAPCS16_VFP) +return true; + + // In C++ mode, arguments which have sizeof() == 0 are ignored. This is not a + // situation which is defined by any C++ standard or ABI, but this matches + // GCC's de facto ABI. + if (Size == 0) +return true; + + // Clang 19.0 and earlier always ignored empty struct arguments in C++ mode. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver19) +return true; + + // Otherwise, they are passed as if they have a size of 1 byte. + return false; +} + ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
[llvm-branch-commits] [clang] release/20.x: [ARM] Empty structs are 1-byte for C++ ABI (#124762) (PR #125194)
github-actions[bot] wrote: @ostannard (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/125194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) (PR #126367)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/126367 >From 5f2d66a9d22537332a9630708b0a7602848babc6 Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Sat, 8 Feb 2025 14:14:16 +0100 Subject: [PATCH 1/2] [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) Fix printing the correct file path in the error message when the output file specified by `--dump-section` cannot be opened Fixes: #125113 on ELF, MachO, Wasm (cherry picked from commit 66bea0df75ccdd5ffed41d06c7301a116d11abcb) --- llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp | 59 ++- llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp | 27 + llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp | 15 ++--- .../tools/llvm-objcopy/ELF/dump-section.test | 4 ++ .../llvm-objcopy/MachO/dump-section.test | 4 ++ .../tools/llvm-objcopy/wasm/dump-section.test | 4 ++ 6 files changed, 64 insertions(+), 49 deletions(-) diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp index 5aa0079f3fbc7a7..9c78f7433ad3390 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -186,27 +186,28 @@ static std::unique_ptr createWriter(const CommonConfig &Config, } static Error dumpSectionToFile(StringRef SecName, StringRef Filename, - Object &Obj) { + StringRef InputFilename, Object &Obj) { for (auto &Sec : Obj.sections()) { if (Sec.Name == SecName) { if (Sec.Type == SHT_NOBITS) -return createStringError(object_error::parse_failed, - "cannot dump section '%s': it has no contents", - SecName.str().c_str()); +return createFileError(InputFilename, object_error::parse_failed, + "cannot dump section '%s': it has no contents", + SecName.str().c_str()); Expected> BufferOrErr = FileOutputBuffer::create(Filename, Sec.OriginalData.size()); if (!BufferOrErr) -return BufferOrErr.takeError(); +return createFileError(Filename, BufferOrErr.takeError()); std::unique_ptr Buf = std::move(*BufferOrErr); std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf->getBufferStart()); if (Error E = Buf->commit()) -return E; +return createFileError(Filename, std::move(E)); return Error::success(); } } - return createStringError(object_error::parse_failed, "section '%s' not found", - SecName.str().c_str()); + + return createFileError(InputFilename, object_error::parse_failed, + "section '%s' not found", SecName.str().c_str()); } Error Object::compressOrDecompressSections(const CommonConfig &Config) { @@ -798,7 +799,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, StringRef SectionName; StringRef FileName; std::tie(SectionName, FileName) = Flag.split('='); -if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) +if (Error E = +dumpSectionToFile(SectionName, FileName, Config.InputFilename, Obj)) return E; } @@ -807,10 +809,10 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, // us to avoid reporting the inappropriate errors about removing symbols // named in relocations. if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj)) -return E; +return createFileError(Config.InputFilename, std::move(E)); if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj)) -return E; +return createFileError(Config.InputFilename, std::move(E)); if (!Config.SetSectionAlignment.empty()) { for (SectionBase &Sec : Obj.sections()) { @@ -826,8 +828,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, if (Config.ChangeSectionLMAValAll > 0 && Seg.PAddr > std::numeric_limits::max() - Config.ChangeSectionLMAValAll) { - return createStringError( - errc::invalid_argument, + return createFileError( + Config.InputFilename, errc::invalid_argument, "address 0x" + Twine::utohexstr(Seg.PAddr) + " cannot be increased by 0x" + Twine::utohexstr(Config.ChangeSectionLMAValAll) + @@ -835,8 +837,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, } else if (Config.ChangeSectionLMAValAll < 0 && Seg.PAddr < std::numeric_limits::min() - Config.ChangeSectionLMAValAll) { - return createStringError( - errc::invalid_argument, + return createFileError( + Config.InputFilename, errc::invalid_argument, "a
[llvm-branch-commits] [llvm] 5f2d66a - [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345)
Author: Amr Hesham Date: 2025-02-10T13:42:43-08:00 New Revision: 5f2d66a9d22537332a9630708b0a7602848babc6 URL: https://github.com/llvm/llvm-project/commit/5f2d66a9d22537332a9630708b0a7602848babc6 DIFF: https://github.com/llvm/llvm-project/commit/5f2d66a9d22537332a9630708b0a7602848babc6.diff LOG: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) Fix printing the correct file path in the error message when the output file specified by `--dump-section` cannot be opened Fixes: #125113 on ELF, MachO, Wasm (cherry picked from commit 66bea0df75ccdd5ffed41d06c7301a116d11abcb) Added: Modified: llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp llvm/test/tools/llvm-objcopy/ELF/dump-section.test llvm/test/tools/llvm-objcopy/MachO/dump-section.test llvm/test/tools/llvm-objcopy/wasm/dump-section.test Removed: diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp index 5aa0079f3fbc7a7..9c78f7433ad3390 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -186,27 +186,28 @@ static std::unique_ptr createWriter(const CommonConfig &Config, } static Error dumpSectionToFile(StringRef SecName, StringRef Filename, - Object &Obj) { + StringRef InputFilename, Object &Obj) { for (auto &Sec : Obj.sections()) { if (Sec.Name == SecName) { if (Sec.Type == SHT_NOBITS) -return createStringError(object_error::parse_failed, - "cannot dump section '%s': it has no contents", - SecName.str().c_str()); +return createFileError(InputFilename, object_error::parse_failed, + "cannot dump section '%s': it has no contents", + SecName.str().c_str()); Expected> BufferOrErr = FileOutputBuffer::create(Filename, Sec.OriginalData.size()); if (!BufferOrErr) -return BufferOrErr.takeError(); +return createFileError(Filename, BufferOrErr.takeError()); std::unique_ptr Buf = std::move(*BufferOrErr); std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf->getBufferStart()); if (Error E = Buf->commit()) -return E; +return createFileError(Filename, std::move(E)); return Error::success(); } } - return createStringError(object_error::parse_failed, "section '%s' not found", - SecName.str().c_str()); + + return createFileError(InputFilename, object_error::parse_failed, + "section '%s' not found", SecName.str().c_str()); } Error Object::compressOrDecompressSections(const CommonConfig &Config) { @@ -798,7 +799,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, StringRef SectionName; StringRef FileName; std::tie(SectionName, FileName) = Flag.split('='); -if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) +if (Error E = +dumpSectionToFile(SectionName, FileName, Config.InputFilename, Obj)) return E; } @@ -807,10 +809,10 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, // us to avoid reporting the inappropriate errors about removing symbols // named in relocations. if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj)) -return E; +return createFileError(Config.InputFilename, std::move(E)); if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj)) -return E; +return createFileError(Config.InputFilename, std::move(E)); if (!Config.SetSectionAlignment.empty()) { for (SectionBase &Sec : Obj.sections()) { @@ -826,8 +828,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, if (Config.ChangeSectionLMAValAll > 0 && Seg.PAddr > std::numeric_limits::max() - Config.ChangeSectionLMAValAll) { - return createStringError( - errc::invalid_argument, + return createFileError( + Config.InputFilename, errc::invalid_argument, "address 0x" + Twine::utohexstr(Seg.PAddr) + " cannot be increased by 0x" + Twine::utohexstr(Config.ChangeSectionLMAValAll) + @@ -835,8 +837,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, } else if (Config.ChangeSectionLMAValAll < 0 && Seg.PAddr < std::numeric_limits::min() - Config.ChangeSectionLMAValAll) { - return createStringError( - errc::invalid_argument, + return createFileError( +
[llvm-branch-commits] [llvm] ed762db - [LLVM][Support] Add new CreateFileError functions (#125906)
Author: Amr Hesham Date: 2025-02-10T13:42:43-08:00 New Revision: ed762db1e0088a0ad5c7d72e8ad2b08a5b1cf1be URL: https://github.com/llvm/llvm-project/commit/ed762db1e0088a0ad5c7d72e8ad2b08a5b1cf1be DIFF: https://github.com/llvm/llvm-project/commit/ed762db1e0088a0ad5c7d72e8ad2b08a5b1cf1be.diff LOG: [LLVM][Support] Add new CreateFileError functions (#125906) Add new CreateFileError functions to create a StringError with the specified error code and prepend the file path to it Needed for: #125345 (cherry picked from commit 2464f4ba6e0e50bb30c31b6526fa0bdd5a531217) Added: Modified: llvm/include/llvm/Support/Error.h llvm/unittests/Support/ErrorTest.cpp Removed: diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index 90120156ec2ead1..c1b809a09bb80e1 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -1404,6 +1404,23 @@ inline Error createFileError(const Twine &F, size_t Line, std::error_code EC) { return createFileError(F, Line, errorCodeToError(EC)); } +/// Create a StringError with the specified error code and prepend the file path +/// to it. +inline Error createFileError(const Twine &F, std::error_code EC, + const Twine &S) { + Error E = createStringError(EC, S); + return createFileError(F, std::move(E)); +} + +/// Create a StringError with the specified error code and prepend the file path +/// to it. +template +inline Error createFileError(const Twine &F, std::error_code EC, + char const *Fmt, const Ts &...Vals) { + Error E = createStringError(EC, Fmt, Vals...); + return createFileError(F, std::move(E)); +} + Error createFileError(const Twine &F, ErrorSuccess) = delete; /// Helper for check-and-exit error handling. diff --git a/llvm/unittests/Support/ErrorTest.cpp b/llvm/unittests/Support/ErrorTest.cpp index 98d19e8d2a15a3d..00c562ecc059d37 100644 --- a/llvm/unittests/Support/ErrorTest.cpp +++ b/llvm/unittests/Support/ErrorTest.cpp @@ -976,6 +976,17 @@ TEST(Error, FileErrorTest) { handleAllErrors(std::move(FE6), [](std::unique_ptr F) { EXPECT_EQ(F->messageWithoutFileInfo(), "CustomError {6}"); }); + + Error FE7 = + createFileError("file.bin", make_error_code(std::errc::invalid_argument), + "invalid argument"); + EXPECT_EQ(toString(std::move(FE7)), "'file.bin': invalid argument"); + + StringRef Argument = "arg"; + Error FE8 = + createFileError("file.bin", make_error_code(std::errc::invalid_argument), + "invalid argument '%s'", Argument.str().c_str()); + EXPECT_EQ(toString(std::move(FE8)), "'file.bin': invalid argument 'arg'"); } TEST(Error, FileErrorErrorCode) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) (PR #126367)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/126367 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [llvm-objcopy] Fix prints wrong path when dump-section output path doesn't exist (#125345) (PR #126367)
github-actions[bot] wrote: @AmrDeveloper (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/126367 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) (PR #126518)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/126518 >From f0f59e3ecc17ba60ebf5c45cc0564628959466f8 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 10 Feb 2025 09:00:31 -0500 Subject: [PATCH] [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) Also move the -fno-strict-overflow option definition next to the -fstrict-overflow one while here. Also add test coverage for f(no-)wrapv-pointer being a clang-cl option. (cherry picked from commit 71adb054024a1e9bd5ed4566beda74dea65362cd) --- clang/include/clang/Driver/Options.td | 6 +++--- clang/test/Driver/cl-options.c| 4 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a2b47b943ef90dd..02e5c4cbb4bff90 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3505,8 +3505,6 @@ def fno_strict_aliasing : Flag<["-"], "fno-strict-aliasing">, Group, def fstruct_path_tbaa : Flag<["-"], "fstruct-path-tbaa">, Group; def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group; def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group; -def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; defm init_global_zero : BoolOptionWithoutMarshalling<"f", "init-global-zero", PosFlag, @@ -4023,7 +4021,9 @@ defm strict_vtable_pointers : BoolFOption<"strict-vtable-pointers", " overwriting polymorphic C++ objects">, NegFlag>; def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; + Visibility<[ClangOption, CLOption, FlangOption]>; +def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, + Visibility<[ClangOption, CLOption, FlangOption]>; def fpointer_tbaa : Flag<["-"], "fpointer-tbaa">, Group; def fdriver_only : Flag<["-"], "fdriver-only">, Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption]>, diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 29a0fcbc17ac603..9f9ca1bf1a8fdc4 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -738,9 +738,13 @@ // RUN: -fno-modules-search-all \ // RUN: -fimplicit-modules \ // RUN: -fno-implicit-modules \ +// RUN: -fstrict-overflow \ +// RUN: -fno-strict-overflow \ // RUN: -ftrivial-auto-var-init=zero \ // RUN: -fwrapv \ // RUN: -fno-wrapv \ +// RUN: -fwrapv-pointer \ +// RUN: -fno-wrapv-pointer \ // RUN: --version \ // RUN: -Werror /Zs -- %s 2>&1 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] f0f59e3 - [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512)
Author: Nico Weber Date: 2025-02-10T13:47:49-08:00 New Revision: f0f59e3ecc17ba60ebf5c45cc0564628959466f8 URL: https://github.com/llvm/llvm-project/commit/f0f59e3ecc17ba60ebf5c45cc0564628959466f8 DIFF: https://github.com/llvm/llvm-project/commit/f0f59e3ecc17ba60ebf5c45cc0564628959466f8.diff LOG: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) Also move the -fno-strict-overflow option definition next to the -fstrict-overflow one while here. Also add test coverage for f(no-)wrapv-pointer being a clang-cl option. (cherry picked from commit 71adb054024a1e9bd5ed4566beda74dea65362cd) Added: Modified: clang/include/clang/Driver/Options.td clang/test/Driver/cl-options.c Removed: diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a2b47b943ef90dd..02e5c4cbb4bff90 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3505,8 +3505,6 @@ def fno_strict_aliasing : Flag<["-"], "fno-strict-aliasing">, Group, def fstruct_path_tbaa : Flag<["-"], "fstruct-path-tbaa">, Group; def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group; def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group; -def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; defm init_global_zero : BoolOptionWithoutMarshalling<"f", "init-global-zero", PosFlag, @@ -4023,7 +4021,9 @@ defm strict_vtable_pointers : BoolFOption<"strict-vtable-pointers", " overwriting polymorphic C++ objects">, NegFlag>; def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group, - Visibility<[ClangOption, FlangOption]>; + Visibility<[ClangOption, CLOption, FlangOption]>; +def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, + Visibility<[ClangOption, CLOption, FlangOption]>; def fpointer_tbaa : Flag<["-"], "fpointer-tbaa">, Group; def fdriver_only : Flag<["-"], "fdriver-only">, Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption]>, diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 29a0fcbc17ac603..9f9ca1bf1a8fdc4 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -738,9 +738,13 @@ // RUN: -fno-modules-search-all \ // RUN: -fimplicit-modules \ // RUN: -fno-implicit-modules \ +// RUN: -fstrict-overflow \ +// RUN: -fno-strict-overflow \ // RUN: -ftrivial-auto-var-init=zero \ // RUN: -fwrapv \ // RUN: -fno-wrapv \ +// RUN: -fwrapv-pointer \ +// RUN: -fno-wrapv-pointer \ // RUN: --version \ // RUN: -Werror /Zs -- %s 2>&1 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Expose -f(no-)strict-overflow as a clang-cl option (#126512) (PR #126518)
github-actions[bot] wrote: @nico (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/126518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [PAC] Do not support some values of branch-protection with ptrauth-returns (#125280) (PR #126589)
llvmbot wrote: @llvm/pr-subscribers-backend-arm Author: None (llvmbot) Changes Backport 84b0c128a751acfbf5b439edc724ba27d1da653e Requested by: @asl --- Full diff: https://github.com/llvm/llvm-project/pull/126589.diff 11 Files Affected: - (modified) clang/include/clang/Basic/TargetInfo.h (+1) - (modified) clang/lib/Basic/Targets/AArch64.cpp (+8) - (modified) clang/lib/Basic/Targets/AArch64.h (+1) - (modified) clang/lib/Basic/Targets/ARM.cpp (+1) - (modified) clang/lib/Basic/Targets/ARM.h (+1) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+2-2) - (modified) clang/lib/CodeGen/Targets/ARM.cpp (+2-2) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+24-22) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+2-1) - (modified) clang/test/Driver/aarch64-ptrauth.c (+18-9) - (added) clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c (+31) ``diff diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index d762144478b489d..1a8398d449cd2c6 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1469,6 +1469,7 @@ class TargetInfo : public TransferrableTargetInfo, /// specification virtual bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const { Err = ""; return false; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 57c9849ef2a7287..cabda0a1323a3c0 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -253,11 +253,19 @@ bool AArch64TargetInfo::validateGlobalRegisterVariable( bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef, BranchProtectionInfo &BPI, + const LangOptions &LO, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err, HasPAuthLR)) return false; + // GCS is currently untested with ptrauth-returns, but enabling this could be + // allowed in future after testing with a suitable system. + if (LO.PointerAuthReturns && + (PBP.Scope != "none" || PBP.BranchProtectionPAuthLR || + PBP.GuardedControlStack)) +return false; + BPI.SignReturnAddr = llvm::StringSwitch(PBP.Scope) .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf) diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index b75d2a9dc8ecadc..527e49d63512c31 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -132,6 +132,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const override; bool isValidCPUName(StringRef Name) const override; diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 0fd5433a76402ef..aad05052bcf06b6 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -405,6 +405,7 @@ bool ARMTargetInfo::isBranchProtectionSupportedArch(StringRef Arch) const { bool ARMTargetInfo::validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, + const LangOptions &LO, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err)) diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index fdb40c3d41918aa..a42362724b65431 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -155,6 +155,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { bool isBranchProtectionSupportedArch(StringRef Arch) const override; bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const override; // FIXME: This should be based on Arch attributes, not CPU names. diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 057199c66f5a103..170ce1640367a92 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -147,8 +147,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { CGM.getTarget().parseTa
[llvm-branch-commits] [clang] release/20.x: [PAC] Do not support some values of branch-protection with ptrauth-returns (#125280) (PR #126589)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: None (llvmbot) Changes Backport 84b0c128a751acfbf5b439edc724ba27d1da653e Requested by: @asl --- Full diff: https://github.com/llvm/llvm-project/pull/126589.diff 11 Files Affected: - (modified) clang/include/clang/Basic/TargetInfo.h (+1) - (modified) clang/lib/Basic/Targets/AArch64.cpp (+8) - (modified) clang/lib/Basic/Targets/AArch64.h (+1) - (modified) clang/lib/Basic/Targets/ARM.cpp (+1) - (modified) clang/lib/Basic/Targets/ARM.h (+1) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+2-2) - (modified) clang/lib/CodeGen/Targets/ARM.cpp (+2-2) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+24-22) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+2-1) - (modified) clang/test/Driver/aarch64-ptrauth.c (+18-9) - (added) clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c (+31) ``diff diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index d762144478b489d..1a8398d449cd2c6 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1469,6 +1469,7 @@ class TargetInfo : public TransferrableTargetInfo, /// specification virtual bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const { Err = ""; return false; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 57c9849ef2a7287..cabda0a1323a3c0 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -253,11 +253,19 @@ bool AArch64TargetInfo::validateGlobalRegisterVariable( bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef, BranchProtectionInfo &BPI, + const LangOptions &LO, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err, HasPAuthLR)) return false; + // GCS is currently untested with ptrauth-returns, but enabling this could be + // allowed in future after testing with a suitable system. + if (LO.PointerAuthReturns && + (PBP.Scope != "none" || PBP.BranchProtectionPAuthLR || + PBP.GuardedControlStack)) +return false; + BPI.SignReturnAddr = llvm::StringSwitch(PBP.Scope) .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf) diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index b75d2a9dc8ecadc..527e49d63512c31 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -132,6 +132,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const override; bool isValidCPUName(StringRef Name) const override; diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 0fd5433a76402ef..aad05052bcf06b6 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -405,6 +405,7 @@ bool ARMTargetInfo::isBranchProtectionSupportedArch(StringRef Arch) const { bool ARMTargetInfo::validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, + const LangOptions &LO, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err)) diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index fdb40c3d41918aa..a42362724b65431 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -155,6 +155,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { bool isBranchProtectionSupportedArch(StringRef Arch) const override; bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const override; // FIXME: This should be based on Arch attributes, not CPU names. diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 057199c66f5a103..170ce1640367a92 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -147,8 +147,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { CGM.getTarget().parse
[llvm-branch-commits] [clang] release/20.x: [PAC] Do not support some values of branch-protection with ptrauth-returns (#125280) (PR #126589)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 84b0c128a751acfbf5b439edc724ba27d1da653e Requested by: @asl --- Full diff: https://github.com/llvm/llvm-project/pull/126589.diff 11 Files Affected: - (modified) clang/include/clang/Basic/TargetInfo.h (+1) - (modified) clang/lib/Basic/Targets/AArch64.cpp (+8) - (modified) clang/lib/Basic/Targets/AArch64.h (+1) - (modified) clang/lib/Basic/Targets/ARM.cpp (+1) - (modified) clang/lib/Basic/Targets/ARM.h (+1) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+2-2) - (modified) clang/lib/CodeGen/Targets/ARM.cpp (+2-2) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+24-22) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+2-1) - (modified) clang/test/Driver/aarch64-ptrauth.c (+18-9) - (added) clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c (+31) ``diff diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index d762144478b489d..1a8398d449cd2c6 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1469,6 +1469,7 @@ class TargetInfo : public TransferrableTargetInfo, /// specification virtual bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const { Err = ""; return false; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 57c9849ef2a7287..cabda0a1323a3c0 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -253,11 +253,19 @@ bool AArch64TargetInfo::validateGlobalRegisterVariable( bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef, BranchProtectionInfo &BPI, + const LangOptions &LO, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err, HasPAuthLR)) return false; + // GCS is currently untested with ptrauth-returns, but enabling this could be + // allowed in future after testing with a suitable system. + if (LO.PointerAuthReturns && + (PBP.Scope != "none" || PBP.BranchProtectionPAuthLR || + PBP.GuardedControlStack)) +return false; + BPI.SignReturnAddr = llvm::StringSwitch(PBP.Scope) .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf) diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index b75d2a9dc8ecadc..527e49d63512c31 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -132,6 +132,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const override; bool isValidCPUName(StringRef Name) const override; diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 0fd5433a76402ef..aad05052bcf06b6 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -405,6 +405,7 @@ bool ARMTargetInfo::isBranchProtectionSupportedArch(StringRef Arch) const { bool ARMTargetInfo::validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, + const LangOptions &LO, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err)) diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index fdb40c3d41918aa..a42362724b65431 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -155,6 +155,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { bool isBranchProtectionSupportedArch(StringRef Arch) const override; bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, +const LangOptions &LO, StringRef &Err) const override; // FIXME: This should be based on Arch attributes, not CPU names. diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 057199c66f5a103..170ce1640367a92 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -147,8 +147,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { CGM.getTarget().parseTargetAt
[llvm-branch-commits] [llvm] release/20.x: [CG][RISCV]Fix shuffling of odd number of input vectors (PR #125910)
https://github.com/topperc approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/125910 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] f7c7db9 - [VPlan] Check VPWidenIntrinsicSC in VPRecipeWithIRFlags::classof.
Author: Florian Hahn Date: 2025-02-10T13:13:26-08:00 New Revision: f7c7db9b6fb14464d8ac6e224b801f6de81638d9 URL: https://github.com/llvm/llvm-project/commit/f7c7db9b6fb14464d8ac6e224b801f6de81638d9 DIFF: https://github.com/llvm/llvm-project/commit/f7c7db9b6fb14464d8ac6e224b801f6de81638d9.diff LOG: [VPlan] Check VPWidenIntrinsicSC in VPRecipeWithIRFlags::classof. When VPWidenIntrinsicRecipe was changed to inhert from VPRecipeWithIRFlags, VPRecipeWithIRFlags::classof wasn't updated accordingly. Also check for VPWidenIntrinsicSC in VPRecipeWithIRFlags::classof. Fixes https://github.com/llvm/llvm-project/issues/125301. (cherry picked from commit 75b922dccfc35ec25a520b1941e6682a300802b8) Added: llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll Modified: llvm/lib/Transforms/Vectorize/VPlan.h Removed: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a1ff684b2b80175..6c95b08a0201461 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1058,6 +1058,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { R->getVPDefID() == VPRecipeBase::VPWidenEVLSC || R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || + R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC || R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll new file mode 100644 index 000..53bd2d119c1ae40 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -mcpu=neoverse-v2 -force-vector-width=4 -S %s | FileCheck %s + +target triple = "aarch64-unknown-linux" + +; Test case where we visit a VPWidenIntrinsic (for @llvm.fabs) with nnan flags. +; For https://github.com/llvm/llvm-project/issues/125301. +define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %dst.1, ptr noalias %src.1, ptr %src.2) { +; CHECK-LABEL: define void @check_widen_intrinsic_with_nnan( +; CHECK-SAME: ptr noalias [[DST_0:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[SRC_1:%.*]], ptr [[SRC_2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT:br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT:br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] +; CHECK-NEXT:[[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT:[[TMP1:%.*]] = getelementptr inbounds double, ptr [[SRC_1]], i64 [[TMP0]] +; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 +; CHECK-NEXT:[[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8 +; CHECK-NEXT:[[TMP3:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[WIDE_LOAD]]) +; CHECK-NEXT:[[TMP4:%.*]] = fcmp olt <4 x double> [[TMP3]], splat (double 1.00e+00) +; CHECK-NEXT:[[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT:[[TMP6:%.*]] = add i64 [[TMP0]], -1 +; CHECK-NEXT:[[TMP7:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP6]] +; CHECK-NEXT:[[TMP8:%.*]] = getelementptr double, ptr [[TMP7]], i32 0 +; CHECK-NEXT:call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP8]], i32 8, <4 x i1> [[TMP5]]) +; CHECK-NEXT:[[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-NEXT:br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT:[[TMP10:%.*]] = load double, ptr [[SRC_2]], align 8 +; CHECK-NEXT:[[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0 +; CHECK-NEXT:br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT:[[TMP12:%.*]] = phi <4 x double> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT:[[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT:br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT:[[TMP14:%.*]] = load double, ptr [[SRC_2]], align 8 +; CHECK-NEXT:[[TMP15:%.*]] = insertelement <4 x double> [[TMP12]], double [[TMP14]], i32 1 +; CHECK-NEXT:br label %[[PRED_LOAD_CONTIN
[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Replace SELRMux with COPY in case of identical operands. (#125108) (PR #125236)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/125236 >From 5b48526fe347235fd6ac8ecc51928749b0e5deda Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 31 Jan 2025 06:58:01 -0600 Subject: [PATCH] [SystemZ] Replace SELRMux with COPY in case of identical operands. (#125108) If both operands of a SELRMux use the same register which is killed, and the SELRMux is expanded to a jump sequence, a broken MIR results if the kill flag is not removed. This patch replaces the SELRMux with a COPY in these cases. (cherry picked from commit eb1a571114a799f532a12b2f062746d3b92fed88) --- .../lib/Target/SystemZ/SystemZPostRewrite.cpp | 12 +++ llvm/test/CodeGen/SystemZ/cond-move-10.mir| 21 +++ 2 files changed, 33 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/cond-move-10.mir diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp index e15f9027cc20956..cf3073f0f209048 100644 --- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -107,6 +107,18 @@ void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, bool Src1IsHigh = SystemZ::isHighReg(Src1Reg); bool Src2IsHigh = SystemZ::isHighReg(Src2Reg); + // In rare cases both sources are the same register (after + // machine-cse). This must be handled as it may lead to wrong-code (after + // machine-cp) if the kill flag on Src1 isn't cleared (with + // expandCondMove()). + if (Src1Reg == Src2Reg) { +BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), +TII->get(SystemZ::COPY), DestReg) +.addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); +MBBI->eraseFromParent(); +return; + } + // If sources and destination aren't all high or all low, we may be able to // simplify the operation by moving one of the sources to the destination // first. But only if this doesn't clobber the other source. diff --git a/llvm/test/CodeGen/SystemZ/cond-move-10.mir b/llvm/test/CodeGen/SystemZ/cond-move-10.mir new file mode 100644 index 000..1db960829729eab --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-10.mir @@ -0,0 +1,21 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z15 -run-pass=systemz-post-rewrite \ +# RUN: 2>&1 | FileCheck %s + +# The SELRMux has two identical sources - replace with a copy instruction. +# CHECK: name: fun0 +# CHECK: renamable $r1l = AHIMuxK killed renamable $r1l, -1, implicit-def dead $cc +# CHECK-NEXT: CHIMux renamable $r5h, 9, implicit-def $cc +# CHECK-NEXT: $r14h = COPY killed renamable $r1l +--- +name:fun0 +tracksRegLiveness: true +body: | + bb.0: +liveins: $r1l, $r5h +renamable $r1l = AHIMuxK killed renamable $r1l, -1, implicit-def dead $cc +CHIMux renamable $r5h, 9, implicit-def $cc +renamable $r14h = SELRMux killed renamable $r1l, renamable $r1l, 14, 8, implicit $cc +$r14l = COPY killed renamable $r14h +$r14d = LGFR $r14l +Return implicit $r14d +... ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits