https://github.com/badumbatish updated 
https://github.com/llvm/llvm-project/pull/144741

>From b1154b3be42660c7d9d7b6ea59bb6b59a5eacc94 Mon Sep 17 00:00:00 2001
From: badumbatish <tanghocle...@gmail.com>
Date: Wed, 18 Jun 2025 16:38:11 -0700
Subject: [PATCH 01/12] Precommit missed optimization test for #50142

---
 .../WebAssembly/simd-setcc-reductions.ll      | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll

diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
new file mode 100644
index 0000000000000..2cc730e6ff530
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt 
-wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck 
%s
+
+target triple = "wasm64"
+
+define i32 @all_true_16_i8(<16 x i8> %v) {
+; CHECK-LABEL: all_true_16_i8:
+; CHECK:         .functype all_true_16_i8 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0
+; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <16 x i8> %v, zeroinitializer
+  %2 = bitcast <16 x i1> %1 to i16
+  %3 = icmp eq i16 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+define i32 @all_true_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: all_true_4_i32:
+; CHECK:         .functype all_true_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp eq i4 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+define i32 @all_true_8_i16(<8 x i16> %v) {
+; CHECK-LABEL: all_true_8_i16:
+; CHECK:         .functype all_true_8_i16 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i16x8.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <8 x i16> %v, zeroinitializer
+  %2 = bitcast <8 x i1> %1 to i8
+  %3 = icmp eq i8 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+define i32 @all_true_2_i64(<2 x i64> %v) {
+; CHECK-LABEL: all_true_2_i64:
+; CHECK:         .functype all_true_2_i64 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0
+; CHECK-NEXT:    i64x2.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <2 x i64> %v, zeroinitializer
+  %2 = bitcast <2 x i1> %1 to i2
+  %3 = icmp eq i2 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}

>From 5f5f74002df3350307a86d7fd537aa47f0dd5ea9 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Thu, 19 Jun 2025 15:31:21 -0700
Subject: [PATCH 02/12] Fix issue 50142 by adding AnyTrueCombine

This introduces the fold (any_true (setcc <X> 0, eq)) to (not
(all_true)), allowing potential extra fold of (not (not ...))

Introduces test simd-setcc-reductions and readjusts simd-vecreduce-bool
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 41 ++++++++++++++++++-
 .../WebAssembly/simd-setcc-reductions.ll      | 40 ++++--------------
 .../WebAssembly/simd-vecreduce-bool.ll        |  6 +--
 3 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index ec77154d17caa..6165bff626516 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3239,6 +3239,42 @@ static SDValue performBitcastCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
+  // any_true (setcc <X>, 0, eq)
+  // => not (all_true X)
+
+  SDLoc DL(N);
+  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
+  if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
+    return SDValue();
+
+  SDValue SetCC = N->getOperand(1);
+  if (SetCC.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  SDValue LHS = SetCC->getOperand(0);
+  SDValue RHS = SetCC->getOperand(1);
+  ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+  EVT LT = LHS.getValueType();
+  unsigned NumElts = LT.getVectorNumElements();
+  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+    return SDValue();
+
+  EVT Width = MVT::getIntegerVT(128 / NumElts);
+
+  if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
+    return SDValue();
+
+  SDValue Ret = DAG.getZExtOrTrunc(
+      DAG.getNode(
+          ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+          {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
+           DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
+      DL, MVT::i1);
+  Ret = DAG.getNOT(DL, Ret, MVT::i1);
+  return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+}
+
 template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
           Intrinsic::ID Intrin>
 static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
@@ -3427,8 +3463,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
     return performVectorTruncZeroCombine(N, DCI);
   case ISD::TRUNCATE:
     return performTruncateCombine(N, DCI);
-  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN: {
+    if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
+      return AnyTrueCombine;
     return performLowerPartialReduction(N, DCI.DAG);
+  }
   case ISD::MUL:
     return performMulCombine(N, DCI.DAG);
   }
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 2cc730e6ff530..1d0a688216765 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -7,14 +7,8 @@ define i32 @all_true_16_i8(<16 x i8> %v) {
 ; CHECK-LABEL: all_true_16_i8:
 ; CHECK:         .functype all_true_16_i8 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0
-; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i8x16.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <16 x i8> %v, zeroinitializer
   %2 = bitcast <16 x i1> %1 to i16
   %3 = icmp eq i16 %2, 0
@@ -27,14 +21,8 @@ define i32 @all_true_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: all_true_4_i32:
 ; CHECK:         .functype all_true_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i32x4.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp eq i4 %2, 0
@@ -47,14 +35,8 @@ define i32 @all_true_8_i16(<8 x i16> %v) {
 ; CHECK-LABEL: all_true_8_i16:
 ; CHECK:         .functype all_true_8_i16 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT:    i16x8.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i16x8.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <8 x i16> %v, zeroinitializer
   %2 = bitcast <8 x i1> %1 to i8
   %3 = icmp eq i8 %2, 0
@@ -67,14 +49,8 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
 ; CHECK-LABEL: all_true_2_i64:
 ; CHECK:         .functype all_true_2_i64 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0
-; CHECK-NEXT:    i64x2.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i64x2.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <2 x i64> %v, zeroinitializer
   %2 = bitcast <2 x i1> %1 to i2
   %3 = icmp eq i2 %2, 0
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll 
b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
index e6497bca98dc2..f7143711394fa 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
@@ -1086,9 +1086,9 @@ define i1 @test_cmp_v16i8(<16 x i8> %x) {
 ; CHECK-LABEL: test_cmp_v16i8:
 ; CHECK:         .functype test_cmp_v16i8 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0
-; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i8x16.all_true $push0=, $0
+; CHECK-NEXT:    i32.const $push1=, 1
+; CHECK-NEXT:    i32.xor $push2=, $pop0, $pop1
 ; CHECK-NEXT:    return $pop2
   %zero = icmp eq <16 x i8> %x, zeroinitializer
   %ret = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %zero)

>From a093e7e2200c90b1b1b421c30902009b10df2c2e Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Fri, 20 Jun 2025 09:59:18 -0700
Subject: [PATCH 03/12] Use SDPatternMatching and remove truncate...

Use SDPatternMatching and remove truncation. Also added 4xi64 case to
reflect that.
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  1 +
 .../WebAssembly/simd-setcc-reductions.ll      | 23 +++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6165bff626516..df539d65cf51c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/DiagnosticInfo.h"
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 1d0a688216765..c6a387c022f22 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -57,3 +57,26 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
   %conv3 = zext i1 %3 to i32
   ret i32 %conv3
 }
+
+
+define i32 @all_true_4_i64(<4 x i64> %v) {
+; CHECK-LABEL: all_true_4_i64:
+; CHECK:         .functype all_true_4_i64 (v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push9=, 0, 0
+; CHECK-NEXT:    local.tee $push8=, $2=, $pop9
+; CHECK-NEXT:    i64x2.eq $push1=, $0, $pop8
+; CHECK-NEXT:    i64x2.eq $push0=, $1, $2
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 1, 2, 3, 8, 9, 10, 
11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    v128.any_true $push3=, $pop2
+; CHECK-NEXT:    i32.const $push4=, -1
+; CHECK-NEXT:    i32.xor $push5=, $pop3, $pop4
+; CHECK-NEXT:    i32.const $push6=, 1
+; CHECK-NEXT:    i32.and $push7=, $pop5, $pop6
+; CHECK-NEXT:    return $pop7
+  %1 = icmp eq <4 x i64> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp eq i4 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}

>From 9a31f41a497e03924c8f94333bebace86ea959f6 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Thu, 26 Jun 2025 10:03:40 -0700
Subject: [PATCH 04/12] Precommit test to add 3 more any/all true patterns

---
 .../WebAssembly/simd-setcc-reductions.ll      | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index c6a387c022f22..469d2dfc2e26a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -80,3 +80,67 @@ define i32 @all_true_4_i64(<4 x i64> %v) {
   %conv3 = zext i1 %3 to i32
   ret i32 %conv3
 }
+
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), 0, ne
+;   => any_true (set_cc (X), 0, ne)
+;   => any_true (X)
+define i32 @any_true_1_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: any_true_1_4_i32:
+; CHECK:         .functype any_true_1_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    return $pop2
+  %1 = icmp ne <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp ne i4 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne
+;   => not all_true (set_cc (X), 0, eq)
+;   => not all_true (set_cc (X), 0, eq)
+;   => not not any_true (X)
+;   => any_true (X)
+define i32 @any_true_2_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: any_true_2_4_i32:
+; CHECK:         .functype any_true_2_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
+; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp ne i4 %2, -1
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), -1, eq
+;   => all_true (set_cc (X), 0, ne)
+;   => all_true (X)
+define i32 @all_true_2_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: all_true_2_4_i32:
+; CHECK:         .functype all_true_2_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
+; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
+; CHECK-NEXT:    return $pop2
+  %1 = icmp ne <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp eq i4 %2, -1
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+

>From 6cbded9a16aef354f906d00a1e7f87d39ff54226 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Thu, 26 Jun 2025 12:50:53 -0700
Subject: [PATCH 05/12] [WebAssembly] Add 3 more optimization for any/all

all_true (setcc x, 0, eq) -> not any_true
any_true (setcc x, 0, ne) -> any_true
all_true (setcc x, 0, ne) -> all_true
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 73 +++++++++++--------
 .../WebAssembly/simd-setcc-reductions.ll      | 23 ++----
 2 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index df539d65cf51c..a339fbb7b25f7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3240,40 +3240,53 @@ static SDValue performBitcastCombine(SDNode *N,
   return SDValue();
 }
 
-static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
-  // any_true (setcc <X>, 0, eq)
-  // => not (all_true X)
-
-  SDLoc DL(N);
+static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
+  // any_true (setcc <X>, 0, eq) => (not (all_true X))
+  // all_true (setcc <X>, 0, eq) => (not (any_true X))
+  // any_true (setcc <X>, 0, ne) => (any_true X)
+  // all_true (setcc <X>, 0, ne) => (all_true X)
   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
-  if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
-    return SDValue();
+  using namespace llvm::SDPatternMatch;
+  SDLoc DL(N);
+  auto CombineSetCC =
+      [&N, &DAG, &DL](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType,
+                      Intrinsic::WASMIntrinsics InPost) -> SDValue {
+    if (N->getConstantOperandVal(0) != InPre)
+      return SDValue();
 
-  SDValue SetCC = N->getOperand(1);
-  if (SetCC.getOpcode() != ISD::SETCC)
-    return SDValue();
+    SDValue LHS;
+    if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
+                                              m_SpecificCondCode(SetType))))
+      return SDValue();
 
-  SDValue LHS = SetCC->getOperand(0);
-  SDValue RHS = SetCC->getOperand(1);
-  ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
-  EVT LT = LHS.getValueType();
-  unsigned NumElts = LT.getVectorNumElements();
-  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
-    return SDValue();
+    EVT LT = LHS.getValueType();
+    unsigned NumElts = LT.getVectorNumElements();
+    if (LT.getScalarSizeInBits() > 128 / NumElts)
+      return SDValue();
 
-  EVT Width = MVT::getIntegerVT(128 / NumElts);
+    SDValue Ret = DAG.getZExtOrTrunc(
+        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+                    {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
+        DL, MVT::i1);
+    if (SetType == ISD::SETEQ)
+      Ret = DAG.getNOT(DL, Ret, MVT::i1);
+    return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+  };
 
-  if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
-    return SDValue();
+  if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
+                                       Intrinsic::wasm_alltrue))
+    return AnyTrueEQ;
+  if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
+                                       Intrinsic::wasm_anytrue))
+    return AllTrueEQ;
+  if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
+                                       Intrinsic::wasm_anytrue))
+    return AnyTrueNE;
+  if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
+                                       Intrinsic::wasm_alltrue))
+    return AllTrueNE;
 
-  SDValue Ret = DAG.getZExtOrTrunc(
-      DAG.getNode(
-          ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
-          {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
-           DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
-      DL, MVT::i1);
-  Ret = DAG.getNOT(DL, Ret, MVT::i1);
-  return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+  return SDValue();
 }
 
 template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
@@ -3465,8 +3478,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::TRUNCATE:
     return performTruncateCombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN: {
-    if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
-      return AnyTrueCombine;
+    if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
+      return AnyAllCombine;
     return performLowerPartialReduction(N, DCI.DAG);
   }
   case ISD::MUL:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 469d2dfc2e26a..172ff53bfb458 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -89,10 +89,8 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: any_true_1_4_i32:
 ; CHECK:         .functype any_true_1_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    return $pop2
+; CHECK-NEXT:    v128.any_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp ne <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp ne i4 %2, 0
@@ -102,21 +100,14 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) {
 
 ; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne
 ;   => not all_true (set_cc (X), 0, eq)
-;   => not all_true (set_cc (X), 0, eq)
 ;   => not not any_true (X)
 ;   => any_true (X)
 define i32 @any_true_2_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: any_true_2_4_i32:
 ; CHECK:         .functype any_true_2_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
-; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    v128.any_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp ne i4 %2, -1
@@ -132,10 +123,8 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: all_true_2_4_i32:
 ; CHECK:         .functype all_true_2_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
-; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
-; CHECK-NEXT:    return $pop2
+; CHECK-NEXT:    i32x4.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp ne <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp eq i4 %2, -1

>From ba84d0c8d762f093c6ef6d5ef5a446a42a8548a5 Mon Sep 17 00:00:00 2001
From: Erick Velez <erickvel...@gmail.com>
Date: Mon, 30 Jun 2025 11:47:09 -0700
Subject: [PATCH 06/12] [clang-doc] Precommit friends test (#146164)

---
 .../test/clang-doc/json/class.cpp             | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/clang-tools-extra/test/clang-doc/json/class.cpp 
b/clang-tools-extra/test/clang-doc/json/class.cpp
index bd82b8159e2f9..0715fcefbb785 100644
--- a/clang-tools-extra/test/clang-doc/json/class.cpp
+++ b/clang-tools-extra/test/clang-doc/json/class.cpp
@@ -23,6 +23,9 @@ struct MyClass {
   typedef int MyTypedef;
   
   class NestedClass;
+  
+  friend struct Foo;
+  template<typename T> friend void friendFunction(int);
 protected:
   int protectedMethod();
 
@@ -86,6 +89,44 @@ struct MyClass {
 // CHECK-NEXT:        "USR": "{{[0-9A-F]*}}"
 // CHECK-NEXT:      }
 // CHECK-NEXT:    ],
+// CHECK-NOT:     "Friends": [
+// CHECK-NOT:       {
+// CHECK-NOT:         "IsClass": false,
+// CHECK-NOT:         "Params": [
+// CHECK-NOT:           {
+// CHECK-NOT:             "Name": "",
+// CHECK-NOT:             "Type": "int"
+// CHECK-NOT:           }
+// CHECK-NOT:         ],
+// CHECK-NOT:         "Reference": {
+// CHECK-NOT:           "Name": "friendFunction",
+// CHECK-NOT:           "Path": "",
+// CHECK-NOT:           "QualName": "friendFunction",
+// CHECK-NOT:           "USR": "{{[0-9A-F]*}}"
+// CHECK-NOT:         },
+// CHECK-NOT:         "ReturnType": {
+// CHECK-NOT:           "IsBuiltIn": true,
+// CHECK-NOT:           "IsTemplate": false,
+// CHECK-NOT:           "Name": "void",
+// CHECK-NOT:           "QualName": "void",
+// CHECK-NOT:           "USR": "0000000000000000000000000000000000000000"
+// CHECK-NOT:         },
+// CHECK-NOT:         "Template": {
+// CHECK-NOT:           "Parameters": [
+// CHECK-NOT:             "typename T"
+// CHECK-NOT:           ]
+// CHECK-NOT:         }
+// CHECK-NOT:       },
+// CHECK-NOT:       {
+// CHECK-NOT:         "IsClass": true,
+// CHECK-NOT:         "Reference": {
+// CHECK-NOT:           "Name": "Foo",
+// CHECK-NOT:           "Path": "GlobalNamespace",
+// CHECK-NOT:           "QualName": "Foo",
+// CHECK-NOT:           "USR": "{{[0-9A-F]*}}"
+// CHECK-NOT:         },
+// CHECK-NOT:       },
+// CHECK-NOT:    ],
 // COM:           FIXME: FullName is not emitted correctly.
 // CHECK-NEXT:    "FullName": "",
 // CHECK-NEXT:    "IsTypedef": false,

>From 8d963b75c7a67d46c72512f955d73878183f5753 Mon Sep 17 00:00:00 2001
From: badumbatish <tanghocle...@gmail.com>
Date: Wed, 18 Jun 2025 16:38:11 -0700
Subject: [PATCH 07/12] Precommit missed optimization test for #50142

---
 .../WebAssembly/simd-setcc-reductions.ll      | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll

diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
new file mode 100644
index 0000000000000..2cc730e6ff530
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt 
-wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck 
%s
+
+target triple = "wasm64"
+
+define i32 @all_true_16_i8(<16 x i8> %v) {
+; CHECK-LABEL: all_true_16_i8:
+; CHECK:         .functype all_true_16_i8 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0
+; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <16 x i8> %v, zeroinitializer
+  %2 = bitcast <16 x i1> %1 to i16
+  %3 = icmp eq i16 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+define i32 @all_true_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: all_true_4_i32:
+; CHECK:         .functype all_true_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp eq i4 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+define i32 @all_true_8_i16(<8 x i16> %v) {
+; CHECK-LABEL: all_true_8_i16:
+; CHECK:         .functype all_true_8_i16 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i16x8.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <8 x i16> %v, zeroinitializer
+  %2 = bitcast <8 x i1> %1 to i8
+  %3 = icmp eq i8 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+define i32 @all_true_2_i64(<2 x i64> %v) {
+; CHECK-LABEL: all_true_2_i64:
+; CHECK:         .functype all_true_2_i64 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0
+; CHECK-NEXT:    i64x2.eq $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <2 x i64> %v, zeroinitializer
+  %2 = bitcast <2 x i1> %1 to i2
+  %3 = icmp eq i2 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}

>From 441523569172b2a062d45e17398fba15169cecc9 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Thu, 19 Jun 2025 15:31:21 -0700
Subject: [PATCH 08/12] Fix issue 50142 by adding AnyTrueCombine

This introduces the fold (any_true (setcc <X> 0, eq)) to (not
(all_true)), allowing potential extra fold of (not (not ...))

Introduces test simd-setcc-reductions and readjusts simd-vecreduce-bool
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 41 ++++++++++++++++++-
 .../WebAssembly/simd-setcc-reductions.ll      | 40 ++++--------------
 .../WebAssembly/simd-vecreduce-bool.ll        |  6 +--
 3 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index ec77154d17caa..6165bff626516 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3239,6 +3239,42 @@ static SDValue performBitcastCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
+  // any_true (setcc <X>, 0, eq)
+  // => not (all_true X)
+
+  SDLoc DL(N);
+  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
+  if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
+    return SDValue();
+
+  SDValue SetCC = N->getOperand(1);
+  if (SetCC.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  SDValue LHS = SetCC->getOperand(0);
+  SDValue RHS = SetCC->getOperand(1);
+  ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+  EVT LT = LHS.getValueType();
+  unsigned NumElts = LT.getVectorNumElements();
+  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+    return SDValue();
+
+  EVT Width = MVT::getIntegerVT(128 / NumElts);
+
+  if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
+    return SDValue();
+
+  SDValue Ret = DAG.getZExtOrTrunc(
+      DAG.getNode(
+          ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+          {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
+           DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
+      DL, MVT::i1);
+  Ret = DAG.getNOT(DL, Ret, MVT::i1);
+  return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+}
+
 template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
           Intrinsic::ID Intrin>
 static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
@@ -3427,8 +3463,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
     return performVectorTruncZeroCombine(N, DCI);
   case ISD::TRUNCATE:
     return performTruncateCombine(N, DCI);
-  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN: {
+    if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
+      return AnyTrueCombine;
     return performLowerPartialReduction(N, DCI.DAG);
+  }
   case ISD::MUL:
     return performMulCombine(N, DCI.DAG);
   }
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 2cc730e6ff530..1d0a688216765 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -7,14 +7,8 @@ define i32 @all_true_16_i8(<16 x i8> %v) {
 ; CHECK-LABEL: all_true_16_i8:
 ; CHECK:         .functype all_true_16_i8 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0
-; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i8x16.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <16 x i8> %v, zeroinitializer
   %2 = bitcast <16 x i1> %1 to i16
   %3 = icmp eq i16 %2, 0
@@ -27,14 +21,8 @@ define i32 @all_true_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: all_true_4_i32:
 ; CHECK:         .functype all_true_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i32x4.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp eq i4 %2, 0
@@ -47,14 +35,8 @@ define i32 @all_true_8_i16(<8 x i16> %v) {
 ; CHECK-LABEL: all_true_8_i16:
 ; CHECK:         .functype all_true_8_i16 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT:    i16x8.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i16x8.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <8 x i16> %v, zeroinitializer
   %2 = bitcast <8 x i1> %1 to i8
   %3 = icmp eq i8 %2, 0
@@ -67,14 +49,8 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
 ; CHECK-LABEL: all_true_2_i64:
 ; CHECK:         .functype all_true_2_i64 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0
-; CHECK-NEXT:    i64x2.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    i64x2.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <2 x i64> %v, zeroinitializer
   %2 = bitcast <2 x i1> %1 to i2
   %3 = icmp eq i2 %2, 0
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll 
b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
index e6497bca98dc2..f7143711394fa 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
@@ -1086,9 +1086,9 @@ define i1 @test_cmp_v16i8(<16 x i8> %x) {
 ; CHECK-LABEL: test_cmp_v16i8:
 ; CHECK:         .functype test_cmp_v16i8 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0
-; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    i8x16.all_true $push0=, $0
+; CHECK-NEXT:    i32.const $push1=, 1
+; CHECK-NEXT:    i32.xor $push2=, $pop0, $pop1
 ; CHECK-NEXT:    return $pop2
   %zero = icmp eq <16 x i8> %x, zeroinitializer
   %ret = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %zero)

>From dd1bc6714606785ed90b8886471cd9e1fb8b7221 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Fri, 20 Jun 2025 09:59:18 -0700
Subject: [PATCH 09/12] Use SDPatternMatching and remove truncate...

Use SDPatternMatching and remove truncation. Also added 4xi64 case to
reflect that.
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  1 +
 .../WebAssembly/simd-setcc-reductions.ll      | 23 +++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6165bff626516..df539d65cf51c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/DiagnosticInfo.h"
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 1d0a688216765..c6a387c022f22 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -57,3 +57,26 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
   %conv3 = zext i1 %3 to i32
   ret i32 %conv3
 }
+
+
+define i32 @all_true_4_i64(<4 x i64> %v) {
+; CHECK-LABEL: all_true_4_i64:
+; CHECK:         .functype all_true_4_i64 (v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push9=, 0, 0
+; CHECK-NEXT:    local.tee $push8=, $2=, $pop9
+; CHECK-NEXT:    i64x2.eq $push1=, $0, $pop8
+; CHECK-NEXT:    i64x2.eq $push0=, $1, $2
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 1, 2, 3, 8, 9, 10, 
11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    v128.any_true $push3=, $pop2
+; CHECK-NEXT:    i32.const $push4=, -1
+; CHECK-NEXT:    i32.xor $push5=, $pop3, $pop4
+; CHECK-NEXT:    i32.const $push6=, 1
+; CHECK-NEXT:    i32.and $push7=, $pop5, $pop6
+; CHECK-NEXT:    return $pop7
+  %1 = icmp eq <4 x i64> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp eq i4 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}

>From c389843b655a7dd16ba5580f0c40db85be866ff5 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Thu, 26 Jun 2025 10:03:40 -0700
Subject: [PATCH 10/12] Precommit test to add 3 more any/all true patterns

---
 .../WebAssembly/simd-setcc-reductions.ll      | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index c6a387c022f22..469d2dfc2e26a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -80,3 +80,67 @@ define i32 @all_true_4_i64(<4 x i64> %v) {
   %conv3 = zext i1 %3 to i32
   ret i32 %conv3
 }
+
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), 0, ne
+;   => any_true (set_cc (X), 0, ne)
+;   => any_true (X)
+define i32 @any_true_1_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: any_true_1_4_i32:
+; CHECK:         .functype any_true_1_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
+; CHECK-NEXT:    v128.any_true $push2=, $pop1
+; CHECK-NEXT:    return $pop2
+  %1 = icmp ne <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp ne i4 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne
+;   => not all_true (set_cc (X), 0, eq)
+;   => not all_true (set_cc (X), 0, eq)
+;   => not not any_true (X)
+;   => any_true (X)
+define i32 @any_true_2_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: any_true_2_4_i32:
+; CHECK:         .functype any_true_2_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
+; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
+; CHECK-NEXT:    i32.const $push3=, -1
+; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.const $push5=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT:    return $pop6
+  %1 = icmp eq <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp ne i4 %2, -1
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), -1, eq
+;   => all_true (set_cc (X), 0, ne)
+;   => all_true (X)
+define i32 @all_true_2_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: all_true_2_4_i32:
+; CHECK:         .functype all_true_2_4_i32 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
+; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
+; CHECK-NEXT:    return $pop2
+  %1 = icmp ne <4 x i32> %v, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = icmp eq i4 %2, -1
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+

>From 3feb8d0364e1df48f163e6bb996c4f8d48cc7191 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasm...@igalia.com>
Date: Thu, 26 Jun 2025 12:50:53 -0700
Subject: [PATCH 11/12] [WebAssembly] Add 3 more optimization for any/all

all_true (setcc x, 0, eq) -> not any_true
any_true (setcc x, 0, ne) -> any_true
all_true (setcc x, 0, ne) -> all_true
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 73 +++++++++++--------
 .../WebAssembly/simd-setcc-reductions.ll      | 23 ++----
 2 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index df539d65cf51c..a339fbb7b25f7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3240,40 +3240,53 @@ static SDValue performBitcastCombine(SDNode *N,
   return SDValue();
 }
 
-static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
-  // any_true (setcc <X>, 0, eq)
-  // => not (all_true X)
-
-  SDLoc DL(N);
+static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
+  // any_true (setcc <X>, 0, eq) => (not (all_true X))
+  // all_true (setcc <X>, 0, eq) => (not (any_true X))
+  // any_true (setcc <X>, 0, ne) => (any_true X)
+  // all_true (setcc <X>, 0, ne) => (all_true X)
   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
-  if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
-    return SDValue();
+  using namespace llvm::SDPatternMatch;
+  SDLoc DL(N);
+  auto CombineSetCC =
+      [&N, &DAG, &DL](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType,
+                      Intrinsic::WASMIntrinsics InPost) -> SDValue {
+    if (N->getConstantOperandVal(0) != InPre)
+      return SDValue();
 
-  SDValue SetCC = N->getOperand(1);
-  if (SetCC.getOpcode() != ISD::SETCC)
-    return SDValue();
+    SDValue LHS;
+    if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
+                                              m_SpecificCondCode(SetType))))
+      return SDValue();
 
-  SDValue LHS = SetCC->getOperand(0);
-  SDValue RHS = SetCC->getOperand(1);
-  ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
-  EVT LT = LHS.getValueType();
-  unsigned NumElts = LT.getVectorNumElements();
-  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
-    return SDValue();
+    EVT LT = LHS.getValueType();
+    unsigned NumElts = LT.getVectorNumElements();
+    if (LT.getScalarSizeInBits() > 128 / NumElts)
+      return SDValue();
 
-  EVT Width = MVT::getIntegerVT(128 / NumElts);
+    SDValue Ret = DAG.getZExtOrTrunc(
+        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+                    {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
+        DL, MVT::i1);
+    if (SetType == ISD::SETEQ)
+      Ret = DAG.getNOT(DL, Ret, MVT::i1);
+    return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+  };
 
-  if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
-    return SDValue();
+  if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
+                                       Intrinsic::wasm_alltrue))
+    return AnyTrueEQ;
+  if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
+                                       Intrinsic::wasm_anytrue))
+    return AllTrueEQ;
+  if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
+                                       Intrinsic::wasm_anytrue))
+    return AnyTrueNE;
+  if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
+                                       Intrinsic::wasm_alltrue))
+    return AllTrueNE;
 
-  SDValue Ret = DAG.getZExtOrTrunc(
-      DAG.getNode(
-          ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
-          {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
-           DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
-      DL, MVT::i1);
-  Ret = DAG.getNOT(DL, Ret, MVT::i1);
-  return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+  return SDValue();
 }
 
 template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
@@ -3465,8 +3478,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::TRUNCATE:
     return performTruncateCombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN: {
-    if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
-      return AnyTrueCombine;
+    if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
+      return AnyAllCombine;
     return performLowerPartialReduction(N, DCI.DAG);
   }
   case ISD::MUL:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll 
b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 469d2dfc2e26a..172ff53bfb458 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -89,10 +89,8 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: any_true_1_4_i32:
 ; CHECK:         .functype any_true_1_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
-; CHECK-NEXT:    v128.any_true $push2=, $pop1
-; CHECK-NEXT:    return $pop2
+; CHECK-NEXT:    v128.any_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp ne <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp ne i4 %2, 0
@@ -102,21 +100,14 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) {
 
 ; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne
 ;   => not all_true (set_cc (X), 0, eq)
-;   => not all_true (set_cc (X), 0, eq)
 ;   => not not any_true (X)
 ;   => any_true (X)
 define i32 @any_true_2_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: any_true_2_4_i32:
 ; CHECK:         .functype any_true_2_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.eq $push1=, $0, $pop0
-; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
-; CHECK-NEXT:    i32.const $push3=, -1
-; CHECK-NEXT:    i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.const $push5=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT:    return $pop6
+; CHECK-NEXT:    v128.any_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp eq <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp ne i4 %2, -1
@@ -132,10 +123,8 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) {
 ; CHECK-LABEL: all_true_2_4_i32:
 ; CHECK:         .functype all_true_2_4_i32 (v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT:    i32x4.ne $push1=, $0, $pop0
-; CHECK-NEXT:    i32x4.all_true $push2=, $pop1
-; CHECK-NEXT:    return $pop2
+; CHECK-NEXT:    i32x4.all_true $push0=, $0
+; CHECK-NEXT:    return $pop0
   %1 = icmp ne <4 x i32> %v, zeroinitializer
   %2 = bitcast <4 x i1> %1 to i4
   %3 = icmp eq i4 %2, -1

>From 8cafed8c559adf115a788a0866a2046c7c78194b Mon Sep 17 00:00:00 2001
From: badumbatish <--show-origin>
Date: Tue, 1 Jul 2025 13:48:16 -0700
Subject: [PATCH 12/12] [WebAssembly] Fix nit in PR 144741

---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index a339fbb7b25f7..55ff3c4534b5b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3247,10 +3247,18 @@ static SDValue performAnyAllCombine(SDNode *N, 
SelectionDAG &DAG) {
   // all_true (setcc <X>, 0, ne) => (all_true X)
   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
   using namespace llvm::SDPatternMatch;
-  SDLoc DL(N);
-  auto CombineSetCC =
-      [&N, &DAG, &DL](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType,
-                      Intrinsic::WASMIntrinsics InPost) -> SDValue {
+
+  SDValue LHS;
+  if (!sd_match(N->getOperand(1),
+                m_c_SetCC(m_Value(LHS), m_Zero(), m_CondCode())))
+    return SDValue();
+  EVT LT = LHS.getValueType();
+  if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
+    return SDValue();
+
+  auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
+                                 ISD::CondCode SetType,
+                                 Intrinsic::WASMIntrinsics InPost) {
     if (N->getConstantOperandVal(0) != InPre)
       return SDValue();
 
@@ -3259,11 +3267,7 @@ static SDValue performAnyAllCombine(SDNode *N, 
SelectionDAG &DAG) {
                                               m_SpecificCondCode(SetType))))
       return SDValue();
 
-    EVT LT = LHS.getValueType();
-    unsigned NumElts = LT.getVectorNumElements();
-    if (LT.getScalarSizeInBits() > 128 / NumElts)
-      return SDValue();
-
+    SDLoc DL(N);
     SDValue Ret = DAG.getZExtOrTrunc(
         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
                     {DAG.getConstant(InPost, DL, MVT::i32), LHS}),

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to