https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/144741
>From b1154b3be42660c7d9d7b6ea59bb6b59a5eacc94 Mon Sep 17 00:00:00 2001 From: badumbatish <tanghocle...@gmail.com> Date: Wed, 18 Jun 2025 16:38:11 -0700 Subject: [PATCH 01/12] Precommit missed optimization test for #50142 --- .../WebAssembly/simd-setcc-reductions.ll | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll new file mode 100644 index 0000000000000..2cc730e6ff530 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s + +target triple = "wasm64" + +define i32 @all_true_16_i8(<16 x i8> %v) { +; CHECK-LABEL: all_true_16_i8: +; CHECK: .functype all_true_16_i8 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <16 x i8> %v, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + %3 = icmp eq i16 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +define i32 @all_true_4_i32(<4 x i32> %v) { +; CHECK-LABEL: all_true_4_i32: +; CHECK: .functype all_true_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp eq i4 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +define i32 @all_true_8_i16(<8 x i16> %v) { +; CHECK-LABEL: all_true_8_i16: +; CHECK: .functype all_true_8_i16 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <8 x i16> %v, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + %3 = icmp eq i8 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +define i32 @all_true_2_i64(<2 x i64> %v) { +; CHECK-LABEL: all_true_2_i64: +; CHECK: .functype all_true_2_i64 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0 +; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <2 x i64> %v, zeroinitializer + %2 = bitcast <2 x i1> %1 to i2 + %3 = icmp eq i2 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} >From 5f5f74002df3350307a86d7fd537aa47f0dd5ea9 Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Thu, 19 Jun 2025 15:31:21 -0700 Subject: [PATCH 02/12] Fix issue 50142 by adding AnyTrueCombine This introduces the fold (any_true (setcc <X> 0, eq)) to (not (all_true)), allowing potential extra fold of (not (not ...)) Introduces test simd-setcc-reductions and readjusts simd-vecreduce-bool --- .../WebAssembly/WebAssemblyISelLowering.cpp | 41 ++++++++++++++++++- .../WebAssembly/simd-setcc-reductions.ll | 40 ++++-------------- .../WebAssembly/simd-vecreduce-bool.ll | 6 +-- 3 files changed, 51 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index ec77154d17caa..6165bff626516 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3239,6 +3239,42 @@ static SDValue performBitcastCombine(SDNode *N, return SDValue(); } +static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) { + // any_true (setcc <X>, 0, eq) + // => not (all_true X) + + SDLoc DL(N); + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); + if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue) + return SDValue(); + + SDValue SetCC = N->getOperand(1); + if (SetCC.getOpcode() != ISD::SETCC) + return SDValue(); + + SDValue LHS = SetCC->getOperand(0); + SDValue RHS = SetCC->getOperand(1); + ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); + EVT LT = LHS.getValueType(); + unsigned NumElts = LT.getVectorNumElements(); + if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + return SDValue(); + + EVT Width = MVT::getIntegerVT(128 / NumElts); + + if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ) + return SDValue(); + + SDValue Ret = DAG.getZExtOrTrunc( + DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), + DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}), + DL, MVT::i1); + Ret = DAG.getNOT(DL, Ret, MVT::i1); + return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); +} + template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate, Intrinsic::ID Intrin> static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { @@ -3427,8 +3463,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, return performVectorTruncZeroCombine(N, DCI); case ISD::TRUNCATE: return performTruncateCombine(N, DCI); - case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: { + if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG)) + return AnyTrueCombine; return performLowerPartialReduction(N, DCI.DAG); + } case ISD::MUL: return performMulCombine(N, DCI.DAG); } diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index 2cc730e6ff530..1d0a688216765 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -7,14 +7,8 @@ define i32 @all_true_16_i8(<16 x i8> %v) { ; CHECK-LABEL: all_true_16_i8: ; CHECK: .functype all_true_16_i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i8x16.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <16 x i8> %v, zeroinitializer %2 = bitcast <16 x i1> %1 to i16 %3 = icmp eq i16 %2, 0 @@ -27,14 +21,8 @@ define i32 @all_true_4_i32(<4 x i32> %v) { ; CHECK-LABEL: all_true_4_i32: ; CHECK: .functype all_true_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i32x4.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp eq i4 %2, 0 @@ -47,14 +35,8 @@ define i32 @all_true_8_i16(<8 x i16> %v) { ; CHECK-LABEL: all_true_8_i16: ; CHECK: .functype all_true_8_i16 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i16x8.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <8 x i16> %v, zeroinitializer %2 = bitcast <8 x i1> %1 to i8 %3 = icmp eq i8 %2, 0 @@ -67,14 +49,8 @@ define i32 @all_true_2_i64(<2 x i64> %v) { ; CHECK-LABEL: all_true_2_i64: ; CHECK: .functype all_true_2_i64 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0 -; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i64x2.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <2 x i64> %v, zeroinitializer %2 = bitcast <2 x i1> %1 to i2 %3 = icmp eq i2 %2, 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll index e6497bca98dc2..f7143711394fa 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll @@ -1086,9 +1086,9 @@ define i1 @test_cmp_v16i8(<16 x i8> %x) { ; CHECK-LABEL: test_cmp_v16i8: ; CHECK: .functype test_cmp_v16i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i8x16.all_true $push0=, $0 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 ; CHECK-NEXT: return $pop2 %zero = icmp eq <16 x i8> %x, zeroinitializer %ret = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %zero) >From a093e7e2200c90b1b1b421c30902009b10df2c2e Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Fri, 20 Jun 2025 09:59:18 -0700 Subject: [PATCH 03/12] Use SDPatternMatching and remove truncate... Use SDPatternMatching and remove truncation. Also added 4xi64 case to reflect that. --- .../WebAssembly/WebAssemblyISelLowering.cpp | 1 + .../WebAssembly/simd-setcc-reductions.ll | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 6165bff626516..df539d65cf51c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/DiagnosticInfo.h" diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index 1d0a688216765..c6a387c022f22 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -57,3 +57,26 @@ define i32 @all_true_2_i64(<2 x i64> %v) { %conv3 = zext i1 %3 to i32 ret i32 %conv3 } + + +define i32 @all_true_4_i64(<4 x i64> %v) { +; CHECK-LABEL: all_true_4_i64: +; CHECK: .functype all_true_4_i64 (v128, v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push9=, 0, 0 +; CHECK-NEXT: local.tee $push8=, $2=, $pop9 +; CHECK-NEXT: i64x2.eq $push1=, $0, $pop8 +; CHECK-NEXT: i64x2.eq $push0=, $1, $2 +; CHECK-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: v128.any_true $push3=, $pop2 +; CHECK-NEXT: i32.const $push4=, -1 +; CHECK-NEXT: i32.xor $push5=, $pop3, $pop4 +; CHECK-NEXT: i32.const $push6=, 1 +; CHECK-NEXT: i32.and $push7=, $pop5, $pop6 +; CHECK-NEXT: return $pop7 + %1 = icmp eq <4 x i64> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp eq i4 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} >From 9a31f41a497e03924c8f94333bebace86ea959f6 Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Thu, 26 Jun 2025 10:03:40 -0700 Subject: [PATCH 04/12] Precommit test to add 3 more any/all true patterns --- .../WebAssembly/simd-setcc-reductions.ll | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index c6a387c022f22..469d2dfc2e26a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -80,3 +80,67 @@ define i32 @all_true_4_i64(<4 x i64> %v) { %conv3 = zext i1 %3 to i32 ret i32 %conv3 } + + +; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), 0, ne +; => any_true (set_cc (X), 0, ne) +; => any_true (X) +define i32 @any_true_1_4_i32(<4 x i32> %v) { +; CHECK-LABEL: any_true_1_4_i32: +; CHECK: .functype any_true_1_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %1 = icmp ne <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp ne i4 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + +; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne +; => not all_true (set_cc (X), 0, eq) +; => not all_true (set_cc (X), 0, eq) +; => not not any_true (X) +; => any_true (X) +define i32 @any_true_2_4_i32(<4 x i32> %v) { +; CHECK-LABEL: any_true_2_4_i32: +; CHECK: .functype any_true_2_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 +; CHECK-NEXT: i32x4.all_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp ne i4 %2, -1 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), -1, eq +; => all_true (set_cc (X), 0, ne) +; => all_true (X) +define i32 @all_true_2_4_i32(<4 x i32> %v) { +; CHECK-LABEL: all_true_2_4_i32: +; CHECK: .functype all_true_2_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 +; CHECK-NEXT: i32x4.all_true $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %1 = icmp ne <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp eq i4 %2, -1 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + >From 6cbded9a16aef354f906d00a1e7f87d39ff54226 Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Thu, 26 Jun 2025 12:50:53 -0700 Subject: [PATCH 05/12] [WebAssembly] Add 3 more optimization for any/all all_true (setcc x, 0, eq) -> not any_true any_true (setcc x, 0, ne) -> any_true all_true (setcc x, 0, ne) -> all_true --- .../WebAssembly/WebAssemblyISelLowering.cpp | 73 +++++++++++-------- .../WebAssembly/simd-setcc-reductions.ll | 23 ++---- 2 files changed, 49 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index df539d65cf51c..a339fbb7b25f7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3240,40 +3240,53 @@ static SDValue performBitcastCombine(SDNode *N, return SDValue(); } -static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) { - // any_true (setcc <X>, 0, eq) - // => not (all_true X) - - SDLoc DL(N); +static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) { + // any_true (setcc <X>, 0, eq) => (not (all_true X)) + // all_true (setcc <X>, 0, eq) => (not (any_true X)) + // any_true (setcc <X>, 0, ne) => (any_true X) + // all_true (setcc <X>, 0, ne) => (all_true X) assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); - if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue) - return SDValue(); + using namespace llvm::SDPatternMatch; + SDLoc DL(N); + auto CombineSetCC = + [&N, &DAG, &DL](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType, + Intrinsic::WASMIntrinsics InPost) -> SDValue { + if (N->getConstantOperandVal(0) != InPre) + return SDValue(); - SDValue SetCC = N->getOperand(1); - if (SetCC.getOpcode() != ISD::SETCC) - return SDValue(); + SDValue LHS; + if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(), + m_SpecificCondCode(SetType)))) + return SDValue(); - SDValue LHS = SetCC->getOperand(0); - SDValue RHS = SetCC->getOperand(1); - ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); - EVT LT = LHS.getValueType(); - unsigned NumElts = LT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) - return SDValue(); + EVT LT = LHS.getValueType(); + unsigned NumElts = LT.getVectorNumElements(); + if (LT.getScalarSizeInBits() > 128 / NumElts) + return SDValue(); - EVT Width = MVT::getIntegerVT(128 / NumElts); + SDValue Ret = DAG.getZExtOrTrunc( + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(InPost, DL, MVT::i32), LHS}), + DL, MVT::i1); + if (SetType == ISD::SETEQ) + Ret = DAG.getNOT(DL, Ret, MVT::i1); + return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); + }; - if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ) - return SDValue(); + if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ, + Intrinsic::wasm_alltrue)) + return AnyTrueEQ; + if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ, + Intrinsic::wasm_anytrue)) + return AllTrueEQ; + if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE, + Intrinsic::wasm_anytrue)) + return AnyTrueNE; + if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE, + Intrinsic::wasm_alltrue)) + return AllTrueNE; - SDValue Ret = DAG.getZExtOrTrunc( - DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), - DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}), - DL, MVT::i1); - Ret = DAG.getNOT(DL, Ret, MVT::i1); - return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); + return SDValue(); } template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate, @@ -3465,8 +3478,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::TRUNCATE: return performTruncateCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: { - if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG)) - return AnyTrueCombine; + if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG)) + return AnyAllCombine; return performLowerPartialReduction(N, DCI.DAG); } case ISD::MUL: diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index 469d2dfc2e26a..172ff53bfb458 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -89,10 +89,8 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) { ; CHECK-LABEL: any_true_1_4_i32: ; CHECK: .functype any_true_1_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: v128.any_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp ne <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp ne i4 %2, 0 @@ -102,21 +100,14 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) { ; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne ; => not all_true (set_cc (X), 0, eq) -; => not all_true (set_cc (X), 0, eq) ; => not not any_true (X) ; => any_true (X) define i32 @any_true_2_4_i32(<4 x i32> %v) { ; CHECK-LABEL: any_true_2_4_i32: ; CHECK: .functype any_true_2_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 -; CHECK-NEXT: i32x4.all_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: v128.any_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp ne i4 %2, -1 @@ -132,10 +123,8 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) { ; CHECK-LABEL: all_true_2_4_i32: ; CHECK: .functype all_true_2_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 -; CHECK-NEXT: i32x4.all_true $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: i32x4.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp ne <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp eq i4 %2, -1 >From ba84d0c8d762f093c6ef6d5ef5a446a42a8548a5 Mon Sep 17 00:00:00 2001 From: Erick Velez <erickvel...@gmail.com> Date: Mon, 30 Jun 2025 11:47:09 -0700 Subject: [PATCH 06/12] [clang-doc] Precommit friends test (#146164) --- .../test/clang-doc/json/class.cpp | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/clang-tools-extra/test/clang-doc/json/class.cpp b/clang-tools-extra/test/clang-doc/json/class.cpp index bd82b8159e2f9..0715fcefbb785 100644 --- a/clang-tools-extra/test/clang-doc/json/class.cpp +++ b/clang-tools-extra/test/clang-doc/json/class.cpp @@ -23,6 +23,9 @@ struct MyClass { typedef int MyTypedef; class NestedClass; + + friend struct Foo; + template<typename T> friend void friendFunction(int); protected: int protectedMethod(); @@ -86,6 +89,44 @@ struct MyClass { // CHECK-NEXT: "USR": "{{[0-9A-F]*}}" // CHECK-NEXT: } // CHECK-NEXT: ], +// CHECK-NOT: "Friends": [ +// CHECK-NOT: { +// CHECK-NOT: "IsClass": false, +// CHECK-NOT: "Params": [ +// CHECK-NOT: { +// CHECK-NOT: "Name": "", +// CHECK-NOT: "Type": "int" +// CHECK-NOT: } +// CHECK-NOT: ], +// CHECK-NOT: "Reference": { +// CHECK-NOT: "Name": "friendFunction", +// CHECK-NOT: "Path": "", +// CHECK-NOT: "QualName": "friendFunction", +// CHECK-NOT: "USR": "{{[0-9A-F]*}}" +// CHECK-NOT: }, +// CHECK-NOT: "ReturnType": { +// CHECK-NOT: "IsBuiltIn": true, +// CHECK-NOT: "IsTemplate": false, +// CHECK-NOT: "Name": "void", +// CHECK-NOT: "QualName": "void", +// CHECK-NOT: "USR": "0000000000000000000000000000000000000000" +// CHECK-NOT: }, +// CHECK-NOT: "Template": { +// CHECK-NOT: "Parameters": [ +// CHECK-NOT: "typename T" +// CHECK-NOT: ] +// CHECK-NOT: } +// CHECK-NOT: }, +// CHECK-NOT: { +// CHECK-NOT: "IsClass": true, +// CHECK-NOT: "Reference": { +// CHECK-NOT: "Name": "Foo", +// CHECK-NOT: "Path": "GlobalNamespace", +// CHECK-NOT: "QualName": "Foo", +// CHECK-NOT: "USR": "{{[0-9A-F]*}}" +// CHECK-NOT: }, +// CHECK-NOT: }, +// CHECK-NOT: ], // COM: FIXME: FullName is not emitted correctly. // CHECK-NEXT: "FullName": "", // CHECK-NEXT: "IsTypedef": false, >From 8d963b75c7a67d46c72512f955d73878183f5753 Mon Sep 17 00:00:00 2001 From: badumbatish <tanghocle...@gmail.com> Date: Wed, 18 Jun 2025 16:38:11 -0700 Subject: [PATCH 07/12] Precommit missed optimization test for #50142 --- .../WebAssembly/simd-setcc-reductions.ll | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll new file mode 100644 index 0000000000000..2cc730e6ff530 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s + +target triple = "wasm64" + +define i32 @all_true_16_i8(<16 x i8> %v) { +; CHECK-LABEL: all_true_16_i8: +; CHECK: .functype all_true_16_i8 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <16 x i8> %v, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + %3 = icmp eq i16 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +define i32 @all_true_4_i32(<4 x i32> %v) { +; CHECK-LABEL: all_true_4_i32: +; CHECK: .functype all_true_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp eq i4 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +define i32 @all_true_8_i16(<8 x i16> %v) { +; CHECK-LABEL: all_true_8_i16: +; CHECK: .functype all_true_8_i16 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <8 x i16> %v, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + %3 = icmp eq i8 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +define i32 @all_true_2_i64(<2 x i64> %v) { +; CHECK-LABEL: all_true_2_i64: +; CHECK: .functype all_true_2_i64 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0 +; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <2 x i64> %v, zeroinitializer + %2 = bitcast <2 x i1> %1 to i2 + %3 = icmp eq i2 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} >From 441523569172b2a062d45e17398fba15169cecc9 Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Thu, 19 Jun 2025 15:31:21 -0700 Subject: [PATCH 08/12] Fix issue 50142 by adding AnyTrueCombine This introduces the fold (any_true (setcc <X> 0, eq)) to (not (all_true)), allowing potential extra fold of (not (not ...)) Introduces test simd-setcc-reductions and readjusts simd-vecreduce-bool --- .../WebAssembly/WebAssemblyISelLowering.cpp | 41 ++++++++++++++++++- .../WebAssembly/simd-setcc-reductions.ll | 40 ++++-------------- .../WebAssembly/simd-vecreduce-bool.ll | 6 +-- 3 files changed, 51 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index ec77154d17caa..6165bff626516 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3239,6 +3239,42 @@ static SDValue performBitcastCombine(SDNode *N, return SDValue(); } +static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) { + // any_true (setcc <X>, 0, eq) + // => not (all_true X) + + SDLoc DL(N); + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); + if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue) + return SDValue(); + + SDValue SetCC = N->getOperand(1); + if (SetCC.getOpcode() != ISD::SETCC) + return SDValue(); + + SDValue LHS = SetCC->getOperand(0); + SDValue RHS = SetCC->getOperand(1); + ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); + EVT LT = LHS.getValueType(); + unsigned NumElts = LT.getVectorNumElements(); + if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + return SDValue(); + + EVT Width = MVT::getIntegerVT(128 / NumElts); + + if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ) + return SDValue(); + + SDValue Ret = DAG.getZExtOrTrunc( + DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), + DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}), + DL, MVT::i1); + Ret = DAG.getNOT(DL, Ret, MVT::i1); + return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); +} + template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate, Intrinsic::ID Intrin> static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { @@ -3427,8 +3463,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, return performVectorTruncZeroCombine(N, DCI); case ISD::TRUNCATE: return performTruncateCombine(N, DCI); - case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: { + if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG)) + return AnyTrueCombine; return performLowerPartialReduction(N, DCI.DAG); + } case ISD::MUL: return performMulCombine(N, DCI.DAG); } diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index 2cc730e6ff530..1d0a688216765 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -7,14 +7,8 @@ define i32 @all_true_16_i8(<16 x i8> %v) { ; CHECK-LABEL: all_true_16_i8: ; CHECK: .functype all_true_16_i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i8x16.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <16 x i8> %v, zeroinitializer %2 = bitcast <16 x i1> %1 to i16 %3 = icmp eq i16 %2, 0 @@ -27,14 +21,8 @@ define i32 @all_true_4_i32(<4 x i32> %v) { ; CHECK-LABEL: all_true_4_i32: ; CHECK: .functype all_true_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i32x4.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp eq i4 %2, 0 @@ -47,14 +35,8 @@ define i32 @all_true_8_i16(<8 x i16> %v) { ; CHECK-LABEL: all_true_8_i16: ; CHECK: .functype all_true_8_i16 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i16x8.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <8 x i16> %v, zeroinitializer %2 = bitcast <8 x i1> %1 to i8 %3 = icmp eq i8 %2, 0 @@ -67,14 +49,8 @@ define i32 @all_true_2_i64(<2 x i64> %v) { ; CHECK-LABEL: all_true_2_i64: ; CHECK: .functype all_true_2_i64 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0 -; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i64x2.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <2 x i64> %v, zeroinitializer %2 = bitcast <2 x i1> %1 to i2 %3 = icmp eq i2 %2, 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll index e6497bca98dc2..f7143711394fa 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll @@ -1086,9 +1086,9 @@ define i1 @test_cmp_v16i8(<16 x i8> %x) { ; CHECK-LABEL: test_cmp_v16i8: ; CHECK: .functype test_cmp_v16i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: i8x16.all_true $push0=, $0 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 ; CHECK-NEXT: return $pop2 %zero = icmp eq <16 x i8> %x, zeroinitializer %ret = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %zero) >From dd1bc6714606785ed90b8886471cd9e1fb8b7221 Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Fri, 20 Jun 2025 09:59:18 -0700 Subject: [PATCH 09/12] Use SDPatternMatching and remove truncate... Use SDPatternMatching and remove truncation. Also added 4xi64 case to reflect that. --- .../WebAssembly/WebAssemblyISelLowering.cpp | 1 + .../WebAssembly/simd-setcc-reductions.ll | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 6165bff626516..df539d65cf51c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/DiagnosticInfo.h" diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index 1d0a688216765..c6a387c022f22 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -57,3 +57,26 @@ define i32 @all_true_2_i64(<2 x i64> %v) { %conv3 = zext i1 %3 to i32 ret i32 %conv3 } + + +define i32 @all_true_4_i64(<4 x i64> %v) { +; CHECK-LABEL: all_true_4_i64: +; CHECK: .functype all_true_4_i64 (v128, v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push9=, 0, 0 +; CHECK-NEXT: local.tee $push8=, $2=, $pop9 +; CHECK-NEXT: i64x2.eq $push1=, $0, $pop8 +; CHECK-NEXT: i64x2.eq $push0=, $1, $2 +; CHECK-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: v128.any_true $push3=, $pop2 +; CHECK-NEXT: i32.const $push4=, -1 +; CHECK-NEXT: i32.xor $push5=, $pop3, $pop4 +; CHECK-NEXT: i32.const $push6=, 1 +; CHECK-NEXT: i32.and $push7=, $pop5, $pop6 +; CHECK-NEXT: return $pop7 + %1 = icmp eq <4 x i64> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp eq i4 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} >From c389843b655a7dd16ba5580f0c40db85be866ff5 Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Thu, 26 Jun 2025 10:03:40 -0700 Subject: [PATCH 10/12] Precommit test to add 3 more any/all true patterns --- .../WebAssembly/simd-setcc-reductions.ll | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index c6a387c022f22..469d2dfc2e26a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -80,3 +80,67 @@ define i32 @all_true_4_i64(<4 x i64> %v) { %conv3 = zext i1 %3 to i32 ret i32 %conv3 } + + +; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), 0, ne +; => any_true (set_cc (X), 0, ne) +; => any_true (X) +define i32 @any_true_1_4_i32(<4 x i32> %v) { +; CHECK-LABEL: any_true_1_4_i32: +; CHECK: .functype any_true_1_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 +; CHECK-NEXT: v128.any_true $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %1 = icmp ne <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp ne i4 %2, 0 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + +; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne +; => not all_true (set_cc (X), 0, eq) +; => not all_true (set_cc (X), 0, eq) +; => not not any_true (X) +; => any_true (X) +define i32 @any_true_2_4_i32(<4 x i32> %v) { +; CHECK-LABEL: any_true_2_4_i32: +; CHECK: .functype any_true_2_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 +; CHECK-NEXT: i32x4.all_true $push2=, $pop1 +; CHECK-NEXT: i32.const $push3=, -1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 +; CHECK-NEXT: return $pop6 + %1 = icmp eq <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp ne i4 %2, -1 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + +; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), -1, eq +; => all_true (set_cc (X), 0, ne) +; => all_true (X) +define i32 @all_true_2_4_i32(<4 x i32> %v) { +; CHECK-LABEL: all_true_2_4_i32: +; CHECK: .functype all_true_2_4_i32 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 +; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 +; CHECK-NEXT: i32x4.all_true $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %1 = icmp ne <4 x i32> %v, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp eq i4 %2, -1 + %conv3 = zext i1 %3 to i32 + ret i32 %conv3 +} + + >From 3feb8d0364e1df48f163e6bb996c4f8d48cc7191 Mon Sep 17 00:00:00 2001 From: badumbatish <jjasm...@igalia.com> Date: Thu, 26 Jun 2025 12:50:53 -0700 Subject: [PATCH 11/12] [WebAssembly] Add 3 more optimization for any/all all_true (setcc x, 0, eq) -> not any_true any_true (setcc x, 0, ne) -> any_true all_true (setcc x, 0, ne) -> all_true --- .../WebAssembly/WebAssemblyISelLowering.cpp | 73 +++++++++++-------- .../WebAssembly/simd-setcc-reductions.ll | 23 ++---- 2 files changed, 49 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index df539d65cf51c..a339fbb7b25f7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3240,40 +3240,53 @@ static SDValue performBitcastCombine(SDNode *N, return SDValue(); } -static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) { - // any_true (setcc <X>, 0, eq) - // => not (all_true X) - - SDLoc DL(N); +static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) { + // any_true (setcc <X>, 0, eq) => (not (all_true X)) + // all_true (setcc <X>, 0, eq) => (not (any_true X)) + // any_true (setcc <X>, 0, ne) => (any_true X) + // all_true (setcc <X>, 0, ne) => (all_true X) assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); - if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue) - return SDValue(); + using namespace llvm::SDPatternMatch; + SDLoc DL(N); + auto CombineSetCC = + [&N, &DAG, &DL](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType, + Intrinsic::WASMIntrinsics InPost) -> SDValue { + if (N->getConstantOperandVal(0) != InPre) + return SDValue(); - SDValue SetCC = N->getOperand(1); - if (SetCC.getOpcode() != ISD::SETCC) - return SDValue(); + SDValue LHS; + if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(), + m_SpecificCondCode(SetType)))) + return SDValue(); - SDValue LHS = SetCC->getOperand(0); - SDValue RHS = SetCC->getOperand(1); - ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); - EVT LT = LHS.getValueType(); - unsigned NumElts = LT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) - return SDValue(); + EVT LT = LHS.getValueType(); + unsigned NumElts = LT.getVectorNumElements(); + if (LT.getScalarSizeInBits() > 128 / NumElts) + return SDValue(); - EVT Width = MVT::getIntegerVT(128 / NumElts); + SDValue Ret = DAG.getZExtOrTrunc( + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(InPost, DL, MVT::i32), LHS}), + DL, MVT::i1); + if (SetType == ISD::SETEQ) + Ret = DAG.getNOT(DL, Ret, MVT::i1); + return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); + }; - if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ) - return SDValue(); + if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ, + Intrinsic::wasm_alltrue)) + return AnyTrueEQ; + if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ, + Intrinsic::wasm_anytrue)) + return AllTrueEQ; + if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE, + Intrinsic::wasm_anytrue)) + return AnyTrueNE; + if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE, + Intrinsic::wasm_alltrue)) + return AllTrueNE; - SDValue Ret = DAG.getZExtOrTrunc( - DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), - DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}), - DL, MVT::i1); - Ret = DAG.getNOT(DL, Ret, MVT::i1); - return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); + return SDValue(); } template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate, @@ -3465,8 +3478,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::TRUNCATE: return performTruncateCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: { - if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG)) - return AnyTrueCombine; + if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG)) + return AnyAllCombine; return performLowerPartialReduction(N, DCI.DAG); } case ISD::MUL: diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index 469d2dfc2e26a..172ff53bfb458 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -89,10 +89,8 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) { ; CHECK-LABEL: any_true_1_4_i32: ; CHECK: .functype any_true_1_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 -; CHECK-NEXT: v128.any_true $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: v128.any_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp ne <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp ne i4 %2, 0 @@ -102,21 +100,14 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) { ; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne ; => not all_true (set_cc (X), 0, eq) -; => not all_true (set_cc (X), 0, eq) ; => not not any_true (X) ; => any_true (X) define i32 @any_true_2_4_i32(<4 x i32> %v) { ; CHECK-LABEL: any_true_2_4_i32: ; CHECK: .functype any_true_2_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0 -; CHECK-NEXT: i32x4.all_true $push2=, $pop1 -; CHECK-NEXT: i32.const $push3=, -1 -; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.const $push5=, 1 -; CHECK-NEXT: i32.and $push6=, $pop4, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: v128.any_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp eq <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp ne i4 %2, -1 @@ -132,10 +123,8 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) { ; CHECK-LABEL: all_true_2_4_i32: ; CHECK: .functype all_true_2_4_i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0 -; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0 -; CHECK-NEXT: i32x4.all_true $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: i32x4.all_true $push0=, $0 +; CHECK-NEXT: return $pop0 %1 = icmp ne <4 x i32> %v, zeroinitializer %2 = bitcast <4 x i1> %1 to i4 %3 = icmp eq i4 %2, -1 >From 8cafed8c559adf115a788a0866a2046c7c78194b Mon Sep 17 00:00:00 2001 From: badumbatish <--show-origin> Date: Tue, 1 Jul 2025 13:48:16 -0700 Subject: [PATCH 12/12] [WebAssembly] Fix nit in PR 144741 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a339fbb7b25f7..55ff3c4534b5b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3247,10 +3247,18 @@ static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) { // all_true (setcc <X>, 0, ne) => (all_true X) assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); using namespace llvm::SDPatternMatch; - SDLoc DL(N); - auto CombineSetCC = - [&N, &DAG, &DL](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType, - Intrinsic::WASMIntrinsics InPost) -> SDValue { + + SDValue LHS; + if (!sd_match(N->getOperand(1), + m_c_SetCC(m_Value(LHS), m_Zero(), m_CondCode()))) + return SDValue(); + EVT LT = LHS.getValueType(); + if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements()) + return SDValue(); + + auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre, + ISD::CondCode SetType, + Intrinsic::WASMIntrinsics InPost) { if (N->getConstantOperandVal(0) != InPre) return SDValue(); @@ -3259,11 +3267,7 @@ static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) { m_SpecificCondCode(SetType)))) return SDValue(); - EVT LT = LHS.getValueType(); - unsigned NumElts = LT.getVectorNumElements(); - if (LT.getScalarSizeInBits() > 128 / NumElts) - return SDValue(); - + SDLoc DL(N); SDValue Ret = DAG.getZExtOrTrunc( DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, {DAG.getConstant(InPost, DL, MVT::i32), LHS}), _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits