[llvm-branch-commits] [CodeGen] Add dump() to MachineTraceMetrics.h (PR #97799)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/97799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [Metadata] Try to merge the first and last ranges. (#101860) (PR #101875)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/101875 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/104745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support llvm.readsteadycounter intrinsic (PR #82322)
https://github.com/dtcxzyw edited https://github.com/llvm/llvm-project/pull/82322 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support llvm.readsteadycounter intrinsic (PR #82322)
@@ -11725,13 +11727,27 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Result); break; } - case ISD::READCYCLECOUNTER: { -assert(!Subtarget.is64Bit() && - "READCYCLECOUNTER only has custom type legalization on riscv32"); + case ISD::READCYCLECOUNTER: + case ISD::READSTEADYCOUNTER: { +assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only " + "has custom type legalization on riscv32"); +SDValue LoCounter, HiCounter; +MVT XLenVT = Subtarget.getXLenVT(); +if (N->getOpcode() == ISD::READCYCLECOUNTER) { + LoCounter = DAG.getConstant( + RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT); + HiCounter = DAG.getConstant( + RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT); +} else if (N->getOpcode() == ISD::READSTEADYCOUNTER) { dtcxzyw wrote: ```suggestion } else { ``` https://github.com/llvm/llvm-project/pull/82322 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support llvm.readsteadycounter intrinsic (PR #82322)
https://github.com/dtcxzyw approved this pull request. LGTM. Thanks! https://github.com/llvm/llvm-project/pull/82322 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fold gep of exact unsigned division (#82334) (PR #82347)
https://github.com/dtcxzyw approved this pull request. LGTM if CI is happy. https://github.com/llvm/llvm-project/pull/82347 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support select optimization (PR #80124)
https://github.com/dtcxzyw requested changes to this pull request. https://github.com/llvm/llvm-project/pull/80124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support select optimization (PR #80124)
@@ -451,6 +456,9 @@ void RISCVPassConfig::addIRPasses() { } TargetPassConfig::addIRPasses(); + + if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt) dtcxzyw wrote: > Failed Tests (1): LLVM :: CodeGen/RISCV/O3-pipeline.ll Please update the test. https://github.com/llvm/llvm-project/pull/80124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support select optimization (PR #80124)
https://github.com/dtcxzyw edited https://github.com/llvm/llvm-project/pull/80124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport #83980 to 18.x (PR #84023)
https://github.com/dtcxzyw milestoned https://github.com/llvm/llvm-project/pull/84023 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport #83980 to 18.x (PR #84023)
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/84023 None >From 2d873aac49219cc84335fcf6a77329fb23d74679 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 5 Mar 2024 17:21:16 +0800 Subject: [PATCH] [InstCombine] Handle scalable splat in `getFlippedStrictnessPredicateAndConstant` --- .../InstCombine/InstCombineCompares.cpp | 7 +++ llvm/test/Transforms/InstCombine/pr83931.ll | 15 +++ llvm/test/Transforms/InstCombine/select.ll| 2 +- llvm/test/Transforms/InstCombine/vscale_cmp.ll| 2 +- 4 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/pr83931.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8c0fd662255130..9973a80a7db946 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6491,6 +6491,13 @@ InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, if (!SafeReplacementConstant) SafeReplacementConstant = CI; } + } else if (isa(C->getType())) { +// Handle scalable splat +Value *SplatC = C->getSplatValue(); +auto *CI = dyn_cast_or_null(SplatC); +// Bail out if the constant can't be safely incremented/decremented. +if (!CI || !ConstantIsOk(CI)) + return std::nullopt; } else { // ConstantExpr? return std::nullopt; diff --git a/llvm/test/Transforms/InstCombine/pr83931.ll b/llvm/test/Transforms/InstCombine/pr83931.ll new file mode 100644 index 00..d36ac8d91abd30 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr83931.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define @dont_crash( %x) { +; CHECK-LABEL: define @dont_crash( +; CHECK-SAME: [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[RET:%.*]] = icmp sgt [[X]], shufflevector ( insertelement ( poison, i64 -309383, i64 0), poison, zeroinitializer) +; CHECK-NEXT:ret [[RET]] +; +entry: + %div = sdiv %x, splat (i64 309383) + %ret = icmp sge %div, zeroinitializer + ret %ret +} diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index c5f1b77c6d7404..d859ad357b6f50 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -3423,7 +3423,7 @@ define @scalable_sign_bits( %x) { define @scalable_non_zero( %x) { ; CHECK-LABEL: @scalable_non_zero( ; CHECK-NEXT:[[A:%.*]] = or [[X:%.*]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT:[[CMP:%.*]] = icmp ule [[A]], shufflevector ( insertelement ( poison, i32 56, i64 0), poison, zeroinitializer) +; CHECK-NEXT:[[CMP:%.*]] = icmp ult [[A]], shufflevector ( insertelement ( poison, i32 57, i64 0), poison, zeroinitializer) ; CHECK-NEXT:ret [[CMP]] ; %a = or %x, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/InstCombine/vscale_cmp.ll b/llvm/test/Transforms/InstCombine/vscale_cmp.ll index a7f8368c5d62c8..b2bfc93da089fc 100644 --- a/llvm/test/Transforms/InstCombine/vscale_cmp.ll +++ b/llvm/test/Transforms/InstCombine/vscale_cmp.ll @@ -3,7 +3,7 @@ define @sge( %x) { ; CHECK-LABEL: @sge( -; CHECK-NEXT:[[CMP:%.*]] = icmp sge [[X:%.*]], zeroinitializer +; CHECK-NEXT:[[CMP:%.*]] = icmp sgt [[X:%.*]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer) ; CHECK-NEXT:ret [[CMP]] ; %cmp = icmp sge %x, zeroinitializer ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport #83980 to 18.x (PR #84023)
https://github.com/dtcxzyw edited https://github.com/llvm/llvm-project/pull/84023 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport #83980 to 18.x (PR #84023)
https://github.com/dtcxzyw ready_for_review https://github.com/llvm/llvm-project/pull/84023 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fix infinite loop in select equivalence fold (#84036) (PR #84141)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/84141 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fix miscompilation in PR83947 (#83993) (PR #84021)
dtcxzyw wrote: @nikic Do you know how to add a commit to this PR? ``` diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 2704905f7a35..c87c1199f727 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -292,7 +292,11 @@ entry: define void @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask(ptr %dst, i16 %val) { ; CHECK-LABEL: @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask( ; CHECK-NEXT: entry: -; CHECK-NEXT:store i16 [[VAL:%.*]], ptr [[DST:%.*]], align 2 +; CHECK-NEXT:[[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 +; CHECK-NEXT:[[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT:[[BROADCAST_VALUE:%.*]] = insertelement poison, i16 [[VAL:%.*]], i64 0 +; CHECK-NEXT:[[BROADCAST_SPLATVALUE:%.*]] = shufflevector [[BROADCAST_VALUE]], poison, zeroinitializer +; CHECK-NEXT:call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[BROADCAST_SPLATVALUE]], [[BROADCAST_SPLAT]], i32 2, shufflevector ( insertelement ( zeroinitializer, i1 true, i32 0), zeroinitializer, zeroinitializer)) ; CHECK-NEXT:ret void ; entry: ``` It is a regression. But I think it should be fixed by https://github.com/llvm/llvm-project/commit/fd07b8f809eb64af9b29331ff6b94904b3159f84. https://github.com/llvm/llvm-project/pull/84021 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fix miscompilation in PR83947 (#83993) (PR #84021)
dtcxzyw wrote: > @nikic Do you know how to add a commit to this PR? > > ``` > diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll > b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll > index 2704905f7a35..c87c1199f727 100644 > --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll > +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll > @@ -292,7 +292,11 @@ entry: > define void @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask(ptr > %dst, i16 %val) { > ; CHECK-LABEL: @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask( > ; CHECK-NEXT: entry: > -; CHECK-NEXT:store i16 [[VAL:%.*]], ptr [[DST:%.*]], align 2 > +; CHECK-NEXT:[[BROADCAST_SPLATINSERT:%.*]] = insertelement ptr> poison, ptr [[DST:%.*]], i64 0 > +; CHECK-NEXT:[[BROADCAST_SPLAT:%.*]] = shufflevector > [[BROADCAST_SPLATINSERT]], poison, > zeroinitializer > +; CHECK-NEXT:[[BROADCAST_VALUE:%.*]] = insertelement > poison, i16 [[VAL:%.*]], i64 0 > +; CHECK-NEXT:[[BROADCAST_SPLATVALUE:%.*]] = shufflevector i16> [[BROADCAST_VALUE]], poison, > zeroinitializer > +; CHECK-NEXT:call void @llvm.masked.scatter.nxv4i16.nxv4p0( i16> [[BROADCAST_SPLATVALUE]], [[BROADCAST_SPLAT]], i32 2, > shufflevector ( insertelement ( x i1> zeroinitializer, i1 true, i32 0), zeroinitializer, > zeroinitializer)) > ; CHECK-NEXT:ret void > ; > entry: > ``` > > It is a regression. But I think it should be fixed by > [fd07b8f](https://github.com/llvm/llvm-project/commit/fd07b8f809eb64af9b29331ff6b94904b3159f84). I am trying to fix it in the GitHub codespace. https://github.com/llvm/llvm-project/pull/84021 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fix miscompilation in PR83947 (#83993) (PR #84021)
dtcxzyw wrote: > remote: Permission to llvmbot/llvm-project.git denied to dtcxzyw. fatal: unable to access 'https://github.com/llvmbot/llvm-project/': The requested URL returned error: 403 https://github.com/llvm/llvm-project/pull/84021 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport PR83993 to 18.x (PR #84298)
https://github.com/dtcxzyw milestoned https://github.com/llvm/llvm-project/pull/84298 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport PR83993 to 18.x (PR #84298)
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/84298 Backport #83993 It is an alternative to #84021. >From 02e9b82d220961bc7a42295f051564a217144d4a Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 5 Mar 2024 22:34:04 +0800 Subject: [PATCH] [InstCombine] Fix miscompilation in PR83947 (#83993) https://github.com/llvm/llvm-project/blob/762f762504967efbe159db5c737154b989afc9bb/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp#L394-L407 Comment from @topperc: > This transforms assumes the mask is a non-zero splat. We only know its a splat and not provably all 0s. The mask is a constexpr that includes the address of the global variable. We can't resolve the constant expression to an exact value. Fixes #83947. --- llvm/include/llvm/Analysis/VectorUtils.h | 5 ++ llvm/lib/Analysis/VectorUtils.cpp | 25 +++ .../InstCombine/InstCombineCalls.cpp | 13 ++-- .../InstCombine/masked_intrinsics.ll | 6 +- llvm/test/Transforms/InstCombine/pr83947.ll | 67 +++ 5 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/pr83947.ll diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 7a92e62b53c53d..c6eb66cc9660ca 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -406,6 +406,11 @@ bool maskIsAllZeroOrUndef(Value *Mask); /// lanes can be assumed active. bool maskIsAllOneOrUndef(Value *Mask); +/// Given a mask vector of i1, Return true if any of the elements of this +/// predicate mask are known to be true or undef. That is, return true if at +/// least one lane can be assumed active. +bool maskContainsAllOneOrUndef(Value *Mask); + /// Given a mask vector of the form , return an APInt (of bitwidth Y) /// for each lane which may be active. APInt possiblyDemandedEltsInMask(Value *Mask); diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 73facc76a92b2c..bf7bc0ba84a033 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1012,6 +1012,31 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) { return true; } +bool llvm::maskContainsAllOneOrUndef(Value *Mask) { + assert(isa(Mask->getType()) && + isa(Mask->getType()->getScalarType()) && + cast(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a vector of i1"); + + auto *ConstMask = dyn_cast(Mask); + if (!ConstMask) +return false; + if (ConstMask->isAllOnesValue() || isa(ConstMask)) +return true; + if (isa(ConstMask->getType())) +return false; + for (unsigned + I = 0, + E = cast(ConstMask->getType())->getNumElements(); + I != E; ++I) { +if (auto *MaskElt = ConstMask->getAggregateElement(I)) + if (MaskElt->isAllOnesValue() || isa(MaskElt)) +return true; + } + return false; +} + /// TODO: This is a lot like known bits, but for /// vectors. Is there something we can common this with? APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a647be2d26c761..bc43edb5e62065 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -412,11 +412,14 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) { // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) { - Align Alignment = cast(II.getArgOperand(2))->getAlignValue(); - StoreInst *S = - new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment); - S->copyMetadata(II); - return S; + if (maskContainsAllOneOrUndef(ConstMask)) { +Align Alignment = +cast(II.getArgOperand(2))->getAlignValue(); +StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, + Alignment); +S->copyMetadata(II); +return S; + } } // scatter(vector, splat(ptr), splat(true)) -> store extract(vector, // lastlane), ptr diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 2704905f7a358d..c87c1199f727ea 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -292,7 +292,11 @@ entry: define void @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask(ptr %dst, i16 %val) { ; CHECK-LABEL: @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask( ; CHECK-NEXT: entry: -; CHECK-NEXT:store i16 [[VAL:%.*]], ptr [[DST:%.*]], align 2 +; CHECK-NEXT:
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fix miscompilation in PR83947 (#83993) (PR #84021)
https://github.com/dtcxzyw closed https://github.com/llvm/llvm-project/pull/84021 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport PR83993 to 18.x (PR #84298)
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/84298 >From 02e9b82d220961bc7a42295f051564a217144d4a Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 5 Mar 2024 22:34:04 +0800 Subject: [PATCH] [InstCombine] Fix miscompilation in PR83947 (#83993) https://github.com/llvm/llvm-project/blob/762f762504967efbe159db5c737154b989afc9bb/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp#L394-L407 Comment from @topperc: > This transforms assumes the mask is a non-zero splat. We only know its a splat and not provably all 0s. The mask is a constexpr that includes the address of the global variable. We can't resolve the constant expression to an exact value. Fixes #83947. --- llvm/include/llvm/Analysis/VectorUtils.h | 5 ++ llvm/lib/Analysis/VectorUtils.cpp | 25 +++ .../InstCombine/InstCombineCalls.cpp | 13 ++-- .../InstCombine/masked_intrinsics.ll | 6 +- llvm/test/Transforms/InstCombine/pr83947.ll | 67 +++ 5 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/pr83947.ll diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 7a92e62b53c53d..c6eb66cc9660ca 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -406,6 +406,11 @@ bool maskIsAllZeroOrUndef(Value *Mask); /// lanes can be assumed active. bool maskIsAllOneOrUndef(Value *Mask); +/// Given a mask vector of i1, Return true if any of the elements of this +/// predicate mask are known to be true or undef. That is, return true if at +/// least one lane can be assumed active. +bool maskContainsAllOneOrUndef(Value *Mask); + /// Given a mask vector of the form , return an APInt (of bitwidth Y) /// for each lane which may be active. APInt possiblyDemandedEltsInMask(Value *Mask); diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 73facc76a92b2c..bf7bc0ba84a033 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1012,6 +1012,31 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) { return true; } +bool llvm::maskContainsAllOneOrUndef(Value *Mask) { + assert(isa(Mask->getType()) && + isa(Mask->getType()->getScalarType()) && + cast(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a vector of i1"); + + auto *ConstMask = dyn_cast(Mask); + if (!ConstMask) +return false; + if (ConstMask->isAllOnesValue() || isa(ConstMask)) +return true; + if (isa(ConstMask->getType())) +return false; + for (unsigned + I = 0, + E = cast(ConstMask->getType())->getNumElements(); + I != E; ++I) { +if (auto *MaskElt = ConstMask->getAggregateElement(I)) + if (MaskElt->isAllOnesValue() || isa(MaskElt)) +return true; + } + return false; +} + /// TODO: This is a lot like known bits, but for /// vectors. Is there something we can common this with? APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a647be2d26c761..bc43edb5e62065 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -412,11 +412,14 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) { // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) { - Align Alignment = cast(II.getArgOperand(2))->getAlignValue(); - StoreInst *S = - new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment); - S->copyMetadata(II); - return S; + if (maskContainsAllOneOrUndef(ConstMask)) { +Align Alignment = +cast(II.getArgOperand(2))->getAlignValue(); +StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, + Alignment); +S->copyMetadata(II); +return S; + } } // scatter(vector, splat(ptr), splat(true)) -> store extract(vector, // lastlane), ptr diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 2704905f7a358d..c87c1199f727ea 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -292,7 +292,11 @@ entry: define void @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask(ptr %dst, i16 %val) { ; CHECK-LABEL: @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask( ; CHECK-NEXT: entry: -; CHECK-NEXT:store i16 [[VAL:%.*]], ptr [[DST:%.*]], align 2 +; CHECK-NEXT:[[BROADCAST_SPLATINSERT:%.*]] = insertelement p
[llvm-branch-commits] [llvm] release/18.x: [InstSimplify] Make sure the simplified value doesn't generate poison in threadBinOpOverSelect (#87075) (PR #88353)
dtcxzyw wrote: > Hi @dtcxzyw (or anyone else). If you would like to add a note about this fix > in the release notes (completely optional). Please reply to this comment with > a one or two sentence description of the fix. Fixed an incorrect poison-generating flag preservation in InstSimplify. It will fix a miscompilation on RISCV, which turns the incorrect `or disjoint` into an `add`. Is that OK? https://github.com/llvm/llvm-project/pull/88353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [RISCV][ISel] Fix types in `tryFoldSelectIntoOp` (#90659) (PR #90682)
dtcxzyw wrote: Fixed an incorrect type inference during RISC-V instruction selection, which causes an assertion failure when trying to fold selects into their operands. https://github.com/llvm/llvm-project/pull/90682 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [FunctionAttrs] Fix incorrect nonnull inference for non-inbounds GEP (#91180) (PR #91286)
https://github.com/dtcxzyw approved this pull request. LGTM. https://github.com/llvm/llvm-project/pull/91286 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstSimplify] Do not simplify freeze in `simplifyWithOpReplaced` (#91215) (PR #91419)
https://github.com/dtcxzyw edited https://github.com/llvm/llvm-project/pull/91419 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstSimplify] Do not simplify freeze in `simplifyWithOpReplaced` (#91215) (PR #91419)
https://github.com/dtcxzyw requested changes to this pull request. https://github.com/llvm/llvm-project/pull/91419 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstSimplify] Do not simplify freeze in `simplifyWithOpReplaced` (#91215) (PR #91419)
@@ -3708,3 +3708,91 @@ define i32 @src_select_xxory_eq0_xorxy_y(i32 %x, i32 %y) { %cond = select i1 %xor0, i32 %xor, i32 %y ret i32 %cond } + +define i32 @sequence_select_with_same_cond_false(i1 %c1, i1 %c2){ dtcxzyw wrote: These tests don't belong to the original patch. https://github.com/llvm/llvm-project/pull/91419 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstSimplify] Do not simplify freeze in `simplifyWithOpReplaced` (#91215) (PR #91419)
dtcxzyw wrote: @AtariDreams Please don't rebase your patch unless there are some conflicts to be resolved. At least you should tell us what you did. https://github.com/llvm/llvm-project/pull/91419 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Drop nuw flag when CtlzOp is a sub nuw (#91776) (PR #91917)
@@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { ret <4 x i32> %sel } +define i32 @pr91691(i32 %0) { +; CHECK-LABEL: @pr91691( +; CHECK-NEXT:[[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]] +; CHECK-NEXT:[[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false) +; CHECK-NEXT:[[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] +; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], 31 +; CHECK-NEXT:[[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] +; CHECK-NEXT:ret i32 [[TMP6]] +; + %2 = sub nuw i32 -2, %0 + %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) + %4 = sub i32 32, %3 + %5 = shl i32 1, %4 + %6 = icmp ult i32 %0, -2 + %7 = select i1 %6, i32 %5, i32 1 + ret i32 %7 +} + +define i32 @pr91691_keep_nsw(i32 %0) { +; CHECK-LABEL: @pr91691_keep_nsw( +; CHECK-NEXT:[[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]] +; CHECK-NEXT:[[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false) +; CHECK-NEXT:[[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] +; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], 31 +; CHECK-NEXT:[[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] +; CHECK-NEXT:ret i32 [[TMP6]] +; + %2 = sub nsw i32 -2, %0 + %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) + %4 = sub i32 32, %3 + %5 = shl i32 1, %4 + %6 = icmp ult i32 %0, -2 + %7 = select i1 %6, i32 %5, i32 1 + ret i32 %7 +} + dtcxzyw wrote: ```suggestion define i32 @pr91691(i32 %0) { ; CHECK-LABEL: @pr91691( ; CHECK-NEXT:[[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]] ; CHECK-NEXT:[[TMP3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false), !range [[RNG0]] ; CHECK-NEXT:[[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] ; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], 31 ; CHECK-NEXT:[[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] ; CHECK-NEXT:ret i32 [[TMP6]] ; %2 = sub nuw i32 -2, %0 %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) %4 = sub i32 32, %3 %5 = shl i32 1, %4 %6 = icmp ult i32 %0, -2 %7 = select i1 %6, i32 %5, i32 1 ret i32 %7 } define i32 @pr91691_keep_nsw(i32 %0) { ; CHECK-LABEL: @pr91691_keep_nsw( ; CHECK-NEXT:[[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]] ; CHECK-NEXT:[[TMP3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false), !range [[RNG0]] ; CHECK-NEXT:[[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] ; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], 31 ; CHECK-NEXT:[[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] ; CHECK-NEXT:ret i32 [[TMP6]] ; %2 = sub nsw i32 -2, %0 %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) %4 = sub i32 32, %3 %5 = shl i32 1, %4 %6 = icmp ult i32 %0, -2 %7 = select i1 %6, i32 %5, i32 1 ret i32 %7 } ``` https://github.com/llvm/llvm-project/pull/91917 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Drop nuw flag when CtlzOp is a sub nuw (#91776) (PR #91917)
@@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { ret <4 x i32> %sel } +define i32 @pr91691(i32 %0) { +; CHECK-LABEL: @pr91691( +; CHECK-NEXT:[[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]] +; CHECK-NEXT:[[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false) +; CHECK-NEXT:[[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] +; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], 31 +; CHECK-NEXT:[[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] +; CHECK-NEXT:ret i32 [[TMP6]] +; + %2 = sub nuw i32 -2, %0 + %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) + %4 = sub i32 32, %3 + %5 = shl i32 1, %4 + %6 = icmp ult i32 %0, -2 + %7 = select i1 %6, i32 %5, i32 1 + ret i32 %7 +} + +define i32 @pr91691_keep_nsw(i32 %0) { +; CHECK-LABEL: @pr91691_keep_nsw( +; CHECK-NEXT:[[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]] +; CHECK-NEXT:[[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false) +; CHECK-NEXT:[[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] +; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], 31 +; CHECK-NEXT:[[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] +; CHECK-NEXT:ret i32 [[TMP6]] +; + %2 = sub nsw i32 -2, %0 + %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) + %4 = sub i32 32, %3 + %5 = shl i32 1, %4 + %6 = icmp ult i32 %0, -2 + %7 = select i1 %6, i32 %5, i32 1 + ret i32 %7 +} + dtcxzyw wrote: @tstellar @nikic Do you guys have write access to this PR to directly commit these changes? https://github.com/llvm/llvm-project/pull/91917 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [libc] [compiler-rt] [libcxx] [clang] [llvm] [RISCV] Support select optimization (PR #80124)
@@ -0,0 +1,873 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -select-optimize -mtriple=riscv64 -S < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SELECT +; RUN: opt -select-optimize -mtriple=riscv64 -mattr=+enable-select-opt -S < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-BRANCH +; RUN: opt -select-optimize -mtriple=riscv64 -mattr=+enable-select-opt,+predictable-select-expensive -S < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-BRANCH + +%struct.st = type { i32, i64, ptr, ptr, i16, ptr, ptr, i64, i64 } + +; This test has a select at the end of if.then, which is better transformed to a branch on OoO cores. + +define void @replace(ptr nocapture noundef %newst, ptr noundef %t, ptr noundef %h, i64 noundef %c, i64 noundef %rc, i64 noundef %ma, i64 noundef %n) { dtcxzyw wrote: Could you please reduce the test? https://github.com/llvm/llvm-project/pull/80124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [ConstraintElim] Fix miscompilation caused by PR97974 (#105790) (PR #105797)
dtcxzyw wrote: @nikic Affected rust applications: just rustfmt tree-sitter See also https://github.com/llvm/llvm-project/pull/105790/files https://github.com/llvm/llvm-project/pull/105797 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport "[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)" (PR #107184)
https://github.com/dtcxzyw milestoned https://github.com/llvm/llvm-project/pull/107184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport "[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)" (PR #107184)
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/107184 Backport 9fef09fd2918e7d8c357b98a9a798fe207941f73. >From b247a5774de77002b48257e2ce885b7ae34e9faf Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 4 Sep 2024 12:19:46 +0800 Subject: [PATCH] [Clang][CodeGen] Fix type for atomic float incdec operators (#107075) `llvm::ConstantFP::get(llvm::LLVMContext&, APFloat(float))` always returns a f32 constant. Fix https://github.com/llvm/llvm-project/issues/107054. --- clang/lib/CodeGen/CGExprScalar.cpp| 26 +- clang/test/CodeGen/X86/x86-atomic-double.c| 88 +++--- .../test/CodeGen/X86/x86-atomic-long_double.c | 293 ++ 3 files changed, 300 insertions(+), 107 deletions(-) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index a17d68424bbce5..6e212e74676e8d 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2833,18 +2833,22 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } -// Special case for atomic increment/decrement on floats +// Special case for atomic increment/decrement on floats. +// Bail out non-power-of-2-sized floating point types (e.g., x86_fp80). if (type->isFloatingType()) { - llvm::AtomicRMWInst::BinOp aop = - isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub; - llvm::Instruction::BinaryOps op = - isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub; - llvm::Value *amt = llvm::ConstantFP::get( - VMContext, llvm::APFloat(static_cast(1.0))); - llvm::Value *old = - Builder.CreateAtomicRMW(aop, LV.getAddress(), amt, - llvm::AtomicOrdering::SequentiallyConsistent); - return isPre ? Builder.CreateBinOp(op, old, amt) : old; + llvm::Type *Ty = ConvertType(type); + if (llvm::has_single_bit(Ty->getScalarSizeInBits())) { +llvm::AtomicRMWInst::BinOp aop = +isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub; +llvm::Instruction::BinaryOps op = +isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub; +llvm::Value *amt = llvm::ConstantFP::get(Ty, 1.0); +llvm::AtomicRMWInst *old = Builder.CreateAtomicRMW( +aop, LV.getAddress(), amt, +llvm::AtomicOrdering::SequentiallyConsistent); + +return isPre ? Builder.CreateBinOp(op, old, amt) : old; + } } value = EmitLoadOfLValue(LV, E->getExprLoc()); input = value; diff --git a/clang/test/CodeGen/X86/x86-atomic-double.c b/clang/test/CodeGen/X86/x86-atomic-double.c index 2354c89cc2b170..09c8f70c3db854 100644 --- a/clang/test/CodeGen/X86/x86-atomic-double.c +++ b/clang/test/CodeGen/X86/x86-atomic-double.c @@ -6,20 +6,14 @@ // X64-LABEL: define dso_local double @test_double_post_inc( // X64-SAME: ) #[[ATTR0:[0-9]+]] { // X64-NEXT: entry: -// X64-NEXT:[[RETVAL:%.*]] = alloca double, align 8 -// X64-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.00e+00 seq_cst, align 8 -// X64-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 8 -// X64-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 -// X64-NEXT:ret double [[TMP1]] +// X64-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, double 1.00e+00 seq_cst, align 8 +// X64-NEXT:ret double [[TMP0]] // // X86-LABEL: define dso_local double @test_double_post_inc( // X86-SAME: ) #[[ATTR0:[0-9]+]] { // X86-NEXT: entry: -// X86-NEXT:[[RETVAL:%.*]] = alloca double, align 4 -// X86-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.00e+00 seq_cst, align 8 -// X86-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 4 -// X86-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4 -// X86-NEXT:ret double [[TMP1]] +// X86-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, double 1.00e+00 seq_cst, align 8 +// X86-NEXT:ret double [[TMP0]] // double test_double_post_inc() { @@ -30,20 +24,14 @@ double test_double_post_inc() // X64-LABEL: define dso_local double @test_double_post_dc( // X64-SAME: ) #[[ATTR0]] { // X64-NEXT: entry: -// X64-NEXT:[[RETVAL:%.*]] = alloca double, align 8 -// X64-NEXT:[[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, float 1.00e+00 seq_cst, align 8 -// X64-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 8 -// X64-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 -// X64-NEXT:ret double [[TMP1]] +// X64-NEXT:[[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, double 1.00e+00 seq_cst, align 8 +// X64-NEXT:ret double [[TMP0]] // // X86-LABEL: define dso_local double @test_double_post_dc( // X86-SAME: ) #[[ATTR0
[llvm-branch-commits] [llvm] release/19.x: [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432) (PR #107945)
dtcxzyw wrote: > Hi, since we are wrapping up LLVM 19.1.0 we are very strict with the fixes we > pick at this point. Can you please respond to the following questions to help > me understand if this has to be included in the final release or not. > > Is this PR a fix for a regression or a critical issue? Yes. It fixes a miscompilation reported in https://github.com/llvm/llvm-project/pull/107432. > What is the risk of accepting this into the release branch? It introduces a performance regression. I have filed an issue to track this: https://github.com/llvm/llvm-project/issues/107946. > What is the risk of NOT accepting this into the release branch? ISel for 32-bit signed div/rem stays broken on LoongArch64. https://github.com/llvm/llvm-project/pull/107945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432) (PR #107945)
dtcxzyw wrote: > > It introduces a performance regression. I have filed an issue to track > > this: #107946. > > Is this something you also expect to backport in this case? do we want to > wait for this fix to be available before we merge? In that case - would it be > better to wait and merge both these changes into a 19.1.x release instead? @heiher @SixWeining Do you guys have a plan to backport https://github.com/llvm/llvm-project/pull/107971? https://github.com/llvm/llvm-project/pull/107945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432) (PR #107945)
dtcxzyw wrote: > In that case - does it make sense to wait for that change before merging this? See https://github.com/llvm/llvm-project/pull/107990. https://github.com/llvm/llvm-project/pull/107945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport "[InstCombine] Drop range attributes in `foldIsPowerOf2` (#111946)" (PR #111984)
https://github.com/dtcxzyw edited https://github.com/llvm/llvm-project/pull/111984 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport "[InstCombine] Drop range attributes in `foldIsPowerOf2` (#111946)" (PR #111984)
https://github.com/dtcxzyw milestoned https://github.com/llvm/llvm-project/pull/111984 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Backport "[InstCombine] Drop range attributes in `foldIsPowerOf2` (#111946)" (PR #111984)
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/111984 Backport https://github.com/llvm/llvm-project/commit/6a65e98fa7901dc1de91172d065fafb16ce89d77. As https://github.com/llvm/llvm-project/pull/100899 exists in 19.x code base, I guess 19.x is also a vulnerable version. >From aaa8b792a82e29e8e5931a3c7705f623a1bb7e50 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 11 Oct 2024 18:19:21 +0800 Subject: [PATCH] [InstCombine] Drop range attributes in `foldIsPowerOf2` (#111946) Fixes https://github.com/llvm/llvm-project/issues/111934. --- .../InstCombine/InstCombineAndOrXor.cpp | 18 --- llvm/test/Transforms/InstCombine/ispow2.ll| 32 +++ 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index f9caa4da44931a..3222e8298c3f0b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -926,9 +926,11 @@ static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, } /// Reduce a pair of compares that check if a value has exactly 1 bit set. -/// Also used for logical and/or, must be poison safe. +/// Also used for logical and/or, must be poison safe if range attributes are +/// dropped. static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, - InstCombiner::BuilderTy &Builder) { + InstCombiner::BuilderTy &Builder, + InstCombinerImpl &IC) { // Handle 'and' / 'or' commutation: make the equality check the first operand. if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE) std::swap(Cmp0, Cmp1); @@ -942,7 +944,10 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, match(Cmp1, m_ICmp(Pred1, m_Intrinsic(m_Specific(X)), m_SpecificInt(2))) && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) { -Value *CtPop = Cmp1->getOperand(0); +auto *CtPop = cast(Cmp1->getOperand(0)); +// Drop range attributes and re-infer them in the next iteration. +CtPop->dropPoisonGeneratingAnnotations(); +IC.addToWorklist(CtPop); return Builder.CreateICmpEQ(CtPop, ConstantInt::get(CtPop->getType(), 1)); } // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1 @@ -950,7 +955,10 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, match(Cmp1, m_ICmp(Pred1, m_Intrinsic(m_Specific(X)), m_SpecificInt(1))) && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) { -Value *CtPop = Cmp1->getOperand(0); +auto *CtPop = cast(Cmp1->getOperand(0)); +// Drop range attributes and re-infer them in the next iteration. +CtPop->dropPoisonGeneratingAnnotations(); +IC.addToWorklist(CtPop); return Builder.CreateICmpNE(CtPop, ConstantInt::get(CtPop->getType(), 1)); } return nullptr; @@ -3347,7 +3355,7 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (Value *V = foldSignedTruncationCheck(LHS, RHS, I, Builder)) return V; - if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder)) + if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder, *this)) return V; if (Value *V = foldPowerOf2AndShiftedMask(LHS, RHS, IsAnd, Builder)) diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll index a143b1347ccee5..216ccc5c77257b 100644 --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -1522,3 +1522,35 @@ define <2 x i1> @not_pow2_or_z_known_bits_fail_wrong_cmp(<2 x i32> %xin) { %r = icmp ugt <2 x i32> %cnt, ret <2 x i1> %r } + +; Make sure that range attributes on return values are dropped after merging these two icmps + +define i1 @has_single_bit(i32 %x) { +; CHECK-LABEL: @has_single_bit( +; CHECK-NEXT: entry: +; CHECK-NEXT:[[POPCNT:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) +; CHECK-NEXT:[[SEL:%.*]] = icmp eq i32 [[POPCNT]], 1 +; CHECK-NEXT:ret i1 [[SEL]] +; +entry: + %cmp1 = icmp ne i32 %x, 0 + %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp2 = icmp ult i32 %popcnt, 2 + %sel = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %sel +} + +define i1 @has_single_bit_inv(i32 %x) { +; CHECK-LABEL: @has_single_bit_inv( +; CHECK-NEXT: entry: +; CHECK-NEXT:[[POPCNT:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) +; CHECK-NEXT:[[SEL:%.*]] = icmp ne i32 [[POPCNT]], 1 +; CHECK-NEXT:ret i1 [[SEL]] +; +entry: + %cmp1 = icmp eq i32 %x, 0 + %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp2 = icmp ugt i32 %popcnt, 1 + %sel = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %sel +} __
[llvm-branch-commits] [llvm] [Inliner][Backport] Fix bug where attributes are propagated incorrectly (#109347) (PR #109502)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/109502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [InstCombine] Handle constant GEP expr in `SimplifyDemandedUseBits` (#116794) (PR #116814)
dtcxzyw wrote: @tstellar We should disable the greeter for PRs created by llvmbot. https://github.com/llvm/llvm-project/pull/116814 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [SCEV] Do not allow refinement in the rewriting of BEValue (#117152) (PR #118216)
https://github.com/dtcxzyw closed https://github.com/llvm/llvm-project/pull/118216 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
@@ -22505,6 +22506,53 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + return EmitRISCVCpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { + llvm::Type *Int32Ty = Builder.getInt32Ty(); + llvm::Type *Int64Ty = Builder.getInt64Ty(); + llvm::Type *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty); + llvm::Constant *RISCVCPUModel = + CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model"); + cast(RISCVCPUModel)->setDSOLocal(true); + + auto loadRISCVCPUID = [&](unsigned Index, llvm::Type *ValueTy, +CGBuilderTy &Builder, CodeGenModule &CGM) { +llvm::Value *GEPIndices[] = {Builder.getInt32(0), + llvm::ConstantInt::get(Int32Ty, Index)}; +Value *Ptr = Builder.CreateInBoundsGEP(StructTy, RISCVCPUModel, GEPIndices); +Value *CPUID = Builder.CreateAlignedLoad( +ValueTy, Ptr, +CharUnits::fromQuantity(ValueTy->getScalarSizeInBits() / 8)); +return CPUID; + }; + + const llvm::RISCV::CPUModel CPUModel = llvm::RISCV::getCPUModel(CPUStr); + + // Compare mvendorid. + Value *VendorID = loadRISCVCPUID(0, Int32Ty, Builder, CGM); + Value *Result = Builder.CreateICmpEQ( + VendorID, llvm::ConstantInt::get(Int32Ty, CPUModel.MVendorID)); dtcxzyw wrote: ```suggestion VendorID, Builder.getInt32(CPUModel.MVendorID)); ``` https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
@@ -22505,6 +22506,53 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + return EmitRISCVCpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { + llvm::Type *Int32Ty = Builder.getInt32Ty(); + llvm::Type *Int64Ty = Builder.getInt64Ty(); + llvm::Type *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty); + llvm::Constant *RISCVCPUModel = + CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model"); + cast(RISCVCPUModel)->setDSOLocal(true); + + auto loadRISCVCPUID = [&](unsigned Index, llvm::Type *ValueTy, +CGBuilderTy &Builder, CodeGenModule &CGM) { dtcxzyw wrote: ```suggestion auto loadRISCVCPUID = [&](unsigned Index, llvm::Type *ValueTy) { ``` `Builder` and `CGM` are captured by `[&]`. https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
@@ -22505,6 +22506,53 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + return EmitRISCVCpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { + llvm::Type *Int32Ty = Builder.getInt32Ty(); + llvm::Type *Int64Ty = Builder.getInt64Ty(); + llvm::Type *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty); + llvm::Constant *RISCVCPUModel = + CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model"); + cast(RISCVCPUModel)->setDSOLocal(true); + + auto loadRISCVCPUID = [&](unsigned Index, llvm::Type *ValueTy, +CGBuilderTy &Builder, CodeGenModule &CGM) { +llvm::Value *GEPIndices[] = {Builder.getInt32(0), + llvm::ConstantInt::get(Int32Ty, Index)}; +Value *Ptr = Builder.CreateInBoundsGEP(StructTy, RISCVCPUModel, GEPIndices); +Value *CPUID = Builder.CreateAlignedLoad( +ValueTy, Ptr, +CharUnits::fromQuantity(ValueTy->getScalarSizeInBits() / 8)); dtcxzyw wrote: ```suggestion ValueTy, Ptr, MayBeAlign{}); ``` It will be automatically set to `DL.getABITypeAlign`. https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
@@ -22505,6 +22506,53 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + return EmitRISCVCpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { + llvm::Type *Int32Ty = Builder.getInt32Ty(); + llvm::Type *Int64Ty = Builder.getInt64Ty(); + llvm::Type *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty); + llvm::Constant *RISCVCPUModel = + CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model"); + cast(RISCVCPUModel)->setDSOLocal(true); + + auto loadRISCVCPUID = [&](unsigned Index, llvm::Type *ValueTy, +CGBuilderTy &Builder, CodeGenModule &CGM) { +llvm::Value *GEPIndices[] = {Builder.getInt32(0), + llvm::ConstantInt::get(Int32Ty, Index)}; +Value *Ptr = Builder.CreateInBoundsGEP(StructTy, RISCVCPUModel, GEPIndices); dtcxzyw wrote: ```suggestion Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index); ``` https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [SCEV] Do not allow refinement in the rewriting of BEValue (#117152) (PR #118216)
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/118216 Backport https://github.com/llvm/llvm-project/commit/f7ef0721d60f85e1f699f8d1b83d4402ae19b122 >From 6f08a0f1eb21de59f1c9cb2b8c86597d43e23b31 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 1 Dec 2024 22:18:46 +0800 Subject: [PATCH] release/19.x: [SCEV] Do not allow refinement in the rewriting of BEValue (#117152) --- llvm/include/llvm/Analysis/ScalarEvolution.h | 6 ++ llvm/lib/Analysis/ScalarEvolution.cpp | 47 +++--- .../test/Analysis/ScalarEvolution/pr117133.ll | 94 +++ .../Transforms/IndVarSimplify/pr117133.ll | 44 + .../trip-count-expansion-may-introduce-ub.ll | 10 +- 5 files changed, 184 insertions(+), 17 deletions(-) create mode 100644 llvm/test/Analysis/ScalarEvolution/pr117133.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/pr117133.ll diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index d9bfca763819f1..6d64154af59731 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -2132,6 +2132,12 @@ class ScalarEvolution { bool isGuaranteedToTransferExecutionTo(const Instruction *A, const Instruction *B); + /// Returns true if \p Op is guaranteed not to cause immediate UB. + bool isGuaranteedNotToCauseUB(const SCEV *Op); + + /// Returns true if \p Op is guaranteed to not be poison. + static bool isGuaranteedNotToBePoison(const SCEV *Op); + /// Return true if the SCEV corresponding to \p I is never poison. Proving /// this is more complex than proving that just \p I is never poison, since /// SCEV commons expressions across control flow, and you can have cases diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 412cfe73d3e559..cd272b34aab100 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4217,7 +4217,7 @@ bool ScalarEvolution::canReuseInstruction( // Either the value can't be poison, or the S would also be poison if it // is. -if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) +if (PoisonVals.contains(V) || ::isGuaranteedNotToBePoison(V)) continue; auto *I = dyn_cast(V); @@ -4320,6 +4320,8 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, } for (unsigned i = 1, e = Ops.size(); i != e; ++i) { +if (!isGuaranteedNotToCauseUB(Ops[i])) + continue; // We can replace %x umin_seq %y with %x umin %y if either: // * %y being poison implies %x is also poison. // * %x cannot be the saturating value (e.g. zero for umin). @@ -5936,18 +5938,22 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { // We can generalize this saying that i is the shifted value of BEValue // by one iteration: // PHI(f(0), f({1,+,1})) --> f({0,+,1}) -const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); -const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false); -if (Shifted != getCouldNotCompute() && -Start != getCouldNotCompute()) { - const SCEV *StartVal = getSCEV(StartValueV); - if (Start == StartVal) { -// Okay, for the entire analysis of this edge we assumed the PHI -// to be symbolic. We now need to go back and purge all of the -// entries for the scalars that use the symbolic expression. -forgetMemoizedResults(SymbolicName); -insertValueToMap(PN, Shifted); -return Shifted; + +// Do not allow refinement in rewriting of BEValue. +if (isGuaranteedNotToCauseUB(BEValue)) { + const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); + const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false); + if (Shifted != getCouldNotCompute() && Start != getCouldNotCompute() && + ::impliesPoison(BEValue, Start)) { +const SCEV *StartVal = getSCEV(StartValueV); +if (Start == StartVal) { + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + forgetMemoizedResults(SymbolicName); + insertValueToMap(PN, Shifted); + return Shifted; +} } } } @@ -7319,6 +7325,21 @@ bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A, return false; } +bool ScalarEvolution::isGuaranteedNotToBePoison(const SCEV *Op) { + SCEVPoisonCollector PC(/* LookThroughMaybePoisonBlocking */ true); + visitAll(Op, PC); + return PC.MaybePoison.empty(); +} + +bool ScalarEvolution::isGuaranteedNotToCauseUB(const SCEV *Op) { + return !SCEVExprContains(Op, [this](const SCEV *S) { +auto *UDiv = dyn_cast(S); +// The UDiv ma
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Check nowrap flags when folding comparison of GEPs with the same base pointer (#121892) (PR #125858)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/125858 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [SCEV] Check correct value for UB (#124302) (PR #124895)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/124895 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Recommit "[ConstraintElim] Simplify cmp after uadd.sat/usub.sat (#135603)" (PR #136467)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/136467 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
dtcxzyw wrote: > I don't think there is a need to backport FMF propagation fixes. Is there a policy to judge whether or not to backport a miscompilation bug fix? Actually, it is unlikely to trigger this bug in real-world projects. But this fix is simple and safe to be backported. I am fine with not backporting this if the reason is "it depends on https://github.com/llvm/llvm-project/pull/137131";. https://github.com/llvm/llvm-project/pull/137605 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
https://github.com/dtcxzyw commented: We can do this fold in InstSimplify: https://alive2.llvm.org/ce/z/Dm53TP However, we should wait for the following things before working on more simplifications with `freeze poison`: 1. Remove `freeze poison -> null` canonicalization in InstCombine 2. Replace existing undef handling logic with `freeze poison` See also https://github.com/llvm/llvm-project/pull/119884#issuecomment-2548042978 https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
@@ -3124,6 +3124,19 @@ inline auto m_c_LogicalOp(const LHS &L, const RHS &R) { return m_LogicalOp(L, R); } +struct GuaranteedNotToBeUndefOrPoison_match { + template bool match(ITy *V) { +if (auto *AsValue = dyn_cast(V)) + return isGuaranteedNotToBeUndefOrPoison(AsValue); dtcxzyw wrote: I don't like this helper. Some context information (e.g., AC/DT/CxtI) is available in InstCombine. They are useful to get a more precise analysis result. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
@@ -4813,15 +4813,22 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid // duplicating logic for binops at least. auto getUndefReplacement = [&I](Type *Ty) { -Constant *BestValue = nullptr; -Constant *NullValue = Constant::getNullValue(Ty); +Value *BestValue = nullptr; +Value *NullValue = Constant::getNullValue(Ty); for (const auto *U : I.users()) { - Constant *C = NullValue; + Value *C = NullValue; if (match(U, m_Or(m_Value(), m_Value( C = ConstantInt::getAllOnesValue(Ty); else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value( C = ConstantInt::getTrue(Ty); - + else if (I.hasOneUse() && dtcxzyw wrote: If `I` has only one use, it must have only one user. I would like to hoist this logic out of the loop. ``` if (I.hasOneUse() && match(I->user_back(), m_c_Select(m_Specific(&I), m_Value(Arm)) && isGuaranteedNotToBeUndefOrPoison(Arm, &AC, &DT, &I)) return Arm; // existing code Constant *BestValue = nullptr; Constant *NullValue = Constant::getNullValue(Ty); ... ``` https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
dtcxzyw wrote: > I would like to incorporate this and then once freeze poison -> null > canonicalization is removed from InstCombine refactor appropriately. Would > this be acceptable? I don't mean to block this patch. I just worry that these patches may not be well tested (fuzzers/compile-time tracker/llvm-opt-benchmark) until we remove the canonicalization. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove redundant UseList check in addUse (PR #138676)
https://github.com/dtcxzyw approved this pull request. LG https://github.com/llvm/llvm-project/pull/138676 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove redundant UseList check in addUse (PR #138676)
dtcxzyw wrote: > > Ping? > > Depends on the parent, plus was hoping to run this through compile time > tracker CTMark: https://llvm-compile-time-tracker.com/compare.php?from=a861f50030a9dac28a35654506bb28d2bc239b56&to=a33632206ab5e08caf9e243009f5911400441d01&stat=instructions:u llvm-opt-benchmark: https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2340#issuecomment-2868839363 https://github.com/llvm/llvm-project/pull/138676 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Avoid folding select(umin(X, Y), X) with min/max values in false arm (#143020) (PR #144322)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/144322 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CVP] Keep `ReachableCaseCount` in sync with range of condition (#142302) (PR #142730)
https://github.com/dtcxzyw milestoned https://github.com/llvm/llvm-project/pull/142730 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CVP] Keep `ReachableCaseCount` in sync with range of condition (#142302) (PR #142730)
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/142730 Backport https://github.com/llvm/llvm-project/commit/0f7cc4132b62e0ecdbd3193e954b745c5f492e90. https://github.com/llvm/llvm-project/pull/79993 assumes that a reachable case must be contained by `CR`. However, it doesn't hold for some edge cases. This patch adds additional checks to ensure `ReachableCaseCount` is correct. Note: Similar optimization in SCCP isn't affected by this bug because it uses `CR` to compute `ReachableCaseCount`. Closes https://github.com/llvm/llvm-project/issues/142286. >From 9f73052846c60357a38e0259eba1675f9b14b8c7 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 2 Jun 2025 17:42:02 +0800 Subject: [PATCH] [CVP] Keep `ReachableCaseCount` in sync with range of condition (#142302) https://github.com/llvm/llvm-project/pull/79993 assumes that a reachable case must be contained by `CR`. However, it doesn't hold for some edge cases. This patch adds additional checks to ensure `ReachableCaseCount` is correct. Note: Similar optimization in SCCP isn't affected by this bug because it uses `CR` to compute `ReachableCaseCount`. Closes https://github.com/llvm/llvm-project/issues/142286. --- .../Scalar/CorrelatedValuePropagation.cpp | 59 +++ .../CorrelatedValuePropagation/switch.ll | 36 +++ 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 8e74b8645fad9..86c4170b9a977 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -370,15 +370,30 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, { // Scope for SwitchInstProfUpdateWrapper. It must not live during // ConstantFoldTerminator() as the underlying SwitchInst can be changed. SwitchInstProfUpdateWrapper SI(*I); +ConstantRange CR = +LVI->getConstantRangeAtUse(I->getOperandUse(0), /*UndefAllowed=*/false); unsigned ReachableCaseCount = 0; for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); - auto *Res = dyn_cast_or_null( - LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, - /* UseBlockValue */ true)); + std::optional Predicate = std::nullopt; + if (!CR.contains(Case->getValue())) +Predicate = false; + else if (CR.isSingleElement() && + *CR.getSingleElement() == Case->getValue()) +Predicate = true; + if (!Predicate) { +// Handle missing cases, e.g., the range has a hole. +auto *Res = dyn_cast_or_null( +LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, +/* UseBlockValue=*/true)); +if (Res && Res->isZero()) + Predicate = false; +else if (Res && Res->isOne()) + Predicate = true; + } - if (Res && Res->isZero()) { + if (Predicate && !*Predicate) { // This case never fires - remove it. BasicBlock *Succ = CI->getCaseSuccessor(); Succ->removePredecessor(BB); @@ -395,7 +410,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}}); continue; } - if (Res && Res->isOne()) { + if (Predicate && *Predicate) { // This case always fires. Arrange for the switch to be turned into an // unconditional branch by replacing the switch condition with the case // value. @@ -410,28 +425,24 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, ++ReachableCaseCount; } -BasicBlock *DefaultDest = SI->getDefaultDest(); -if (ReachableCaseCount > 1 && -!isa(DefaultDest->getFirstNonPHIOrDbg())) { - ConstantRange CR = LVI->getConstantRangeAtUse(I->getOperandUse(0), -/*UndefAllowed*/ false); - // The default dest is unreachable if all cases are covered. - if (!CR.isSizeLargerThan(ReachableCaseCount)) { -BasicBlock *NewUnreachableBB = -BasicBlock::Create(BB->getContext(), "default.unreachable", - BB->getParent(), DefaultDest); -new UnreachableInst(BB->getContext(), NewUnreachableBB); +// The default dest is unreachable if all cases are covered. +if (!SI->defaultDestUndefined() && +!CR.isSizeLargerThan(ReachableCaseCount)) { + BasicBlock *DefaultDest = SI->getDefaultDest(); + BasicBlock *NewUnreachableBB = + BasicBlock::Create(BB->getContext(), "default.unreachable", + BB->getParent(), DefaultDest); + new UnreachableInst(BB->getContext(), NewUnreachableBB); -DefaultDest->removePredecessor(BB); -SI->setDe