[llvm] [WebAssembly] Combine any_true (setcc x, 0, eq) to not all_true (PR #144741)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 12:54:33 PDT 2025
https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/144741
>From b1154b3be42660c7d9d7b6ea59bb6b59a5eacc94 Mon Sep 17 00:00:00 2001
From: badumbatish <tanghocle456 at gmail.com>
Date: Wed, 18 Jun 2025 16:38:11 -0700
Subject: [PATCH 1/5] Precommit missed optimization test for #50142
---
.../WebAssembly/simd-setcc-reductions.ll | 83 +++++++++++++++++++
1 file changed, 83 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
new file mode 100644
index 0000000000000..2cc730e6ff530
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm64"
+
+define i32 @all_true_16_i8(<16 x i8> %v) {
+; CHECK-LABEL: all_true_16_i8:
+; CHECK: .functype all_true_16_i8 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0
+; CHECK-NEXT: v128.any_true $push2=, $pop1
+; CHECK-NEXT: i32.const $push3=, -1
+; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT: i32.const $push5=, 1
+; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT: return $pop6
+ %1 = icmp eq <16 x i8> %v, zeroinitializer
+ %2 = bitcast <16 x i1> %1 to i16
+ %3 = icmp eq i16 %2, 0
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
+
+
+define i32 @all_true_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: all_true_4_i32:
+; CHECK: .functype all_true_4_i32 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0
+; CHECK-NEXT: v128.any_true $push2=, $pop1
+; CHECK-NEXT: i32.const $push3=, -1
+; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT: i32.const $push5=, 1
+; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT: return $pop6
+ %1 = icmp eq <4 x i32> %v, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = icmp eq i4 %2, 0
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
+
+
+define i32 @all_true_8_i16(<8 x i16> %v) {
+; CHECK-LABEL: all_true_8_i16:
+; CHECK: .functype all_true_8_i16 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0
+; CHECK-NEXT: v128.any_true $push2=, $pop1
+; CHECK-NEXT: i32.const $push3=, -1
+; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT: i32.const $push5=, 1
+; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT: return $pop6
+ %1 = icmp eq <8 x i16> %v, zeroinitializer
+ %2 = bitcast <8 x i1> %1 to i8
+ %3 = icmp eq i8 %2, 0
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
+
+
+define i32 @all_true_2_i64(<2 x i64> %v) {
+; CHECK-LABEL: all_true_2_i64:
+; CHECK: .functype all_true_2_i64 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push0=, 0, 0
+; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0
+; CHECK-NEXT: v128.any_true $push2=, $pop1
+; CHECK-NEXT: i32.const $push3=, -1
+; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT: i32.const $push5=, 1
+; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT: return $pop6
+ %1 = icmp eq <2 x i64> %v, zeroinitializer
+ %2 = bitcast <2 x i1> %1 to i2
+ %3 = icmp eq i2 %2, 0
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
>From 5f5f74002df3350307a86d7fd537aa47f0dd5ea9 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Thu, 19 Jun 2025 15:31:21 -0700
Subject: [PATCH 2/5] Fix issue 50142 by adding AnyTrueCombine
This introduces the fold (any_true (setcc <X> 0, eq)) to (not
(all_true)), allowing potential extra fold of (not (not ...))
Introduces test simd-setcc-reductions and readjusts simd-vecreduce-bool
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 41 ++++++++++++++++++-
.../WebAssembly/simd-setcc-reductions.ll | 40 ++++--------------
.../WebAssembly/simd-vecreduce-bool.ll | 6 +--
3 files changed, 51 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index ec77154d17caa..6165bff626516 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3239,6 +3239,42 @@ static SDValue performBitcastCombine(SDNode *N,
return SDValue();
}
+static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
+ // any_true (setcc <X>, 0, eq)
+ // => not (all_true X)
+
+ SDLoc DL(N);
+ assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
+ if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
+ return SDValue();
+
+ SDValue SetCC = N->getOperand(1);
+ if (SetCC.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ SDValue LHS = SetCC->getOperand(0);
+ SDValue RHS = SetCC->getOperand(1);
+ ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ EVT LT = LHS.getValueType();
+ unsigned NumElts = LT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ EVT Width = MVT::getIntegerVT(128 / NumElts);
+
+ if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
+ return SDValue();
+
+ SDValue Ret = DAG.getZExtOrTrunc(
+ DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
+ DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
+ DL, MVT::i1);
+ Ret = DAG.getNOT(DL, Ret, MVT::i1);
+ return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+}
+
template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
Intrinsic::ID Intrin>
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
@@ -3427,8 +3463,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
return performVectorTruncZeroCombine(N, DCI);
case ISD::TRUNCATE:
return performTruncateCombine(N, DCI);
- case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN: {
+ if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
+ return AnyTrueCombine;
return performLowerPartialReduction(N, DCI.DAG);
+ }
case ISD::MUL:
return performMulCombine(N, DCI.DAG);
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 2cc730e6ff530..1d0a688216765 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -7,14 +7,8 @@ define i32 @all_true_16_i8(<16 x i8> %v) {
; CHECK-LABEL: all_true_16_i8:
; CHECK: .functype all_true_16_i8 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0
-; CHECK-NEXT: v128.any_true $push2=, $pop1
-; CHECK-NEXT: i32.const $push3=, -1
-; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT: i32.const $push5=, 1
-; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT: return $pop6
+; CHECK-NEXT: i8x16.all_true $push0=, $0
+; CHECK-NEXT: return $pop0
%1 = icmp eq <16 x i8> %v, zeroinitializer
%2 = bitcast <16 x i1> %1 to i16
%3 = icmp eq i16 %2, 0
@@ -27,14 +21,8 @@ define i32 @all_true_4_i32(<4 x i32> %v) {
; CHECK-LABEL: all_true_4_i32:
; CHECK: .functype all_true_4_i32 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0
-; CHECK-NEXT: v128.any_true $push2=, $pop1
-; CHECK-NEXT: i32.const $push3=, -1
-; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT: i32.const $push5=, 1
-; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT: return $pop6
+; CHECK-NEXT: i32x4.all_true $push0=, $0
+; CHECK-NEXT: return $pop0
%1 = icmp eq <4 x i32> %v, zeroinitializer
%2 = bitcast <4 x i1> %1 to i4
%3 = icmp eq i4 %2, 0
@@ -47,14 +35,8 @@ define i32 @all_true_8_i16(<8 x i16> %v) {
; CHECK-LABEL: all_true_8_i16:
; CHECK: .functype all_true_8_i16 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0
-; CHECK-NEXT: v128.any_true $push2=, $pop1
-; CHECK-NEXT: i32.const $push3=, -1
-; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT: i32.const $push5=, 1
-; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT: return $pop6
+; CHECK-NEXT: i16x8.all_true $push0=, $0
+; CHECK-NEXT: return $pop0
%1 = icmp eq <8 x i16> %v, zeroinitializer
%2 = bitcast <8 x i1> %1 to i8
%3 = icmp eq i8 %2, 0
@@ -67,14 +49,8 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
; CHECK-LABEL: all_true_2_i64:
; CHECK: .functype all_true_2_i64 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0
-; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0
-; CHECK-NEXT: v128.any_true $push2=, $pop1
-; CHECK-NEXT: i32.const $push3=, -1
-; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT: i32.const $push5=, 1
-; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT: return $pop6
+; CHECK-NEXT: i64x2.all_true $push0=, $0
+; CHECK-NEXT: return $pop0
%1 = icmp eq <2 x i64> %v, zeroinitializer
%2 = bitcast <2 x i1> %1 to i2
%3 = icmp eq i2 %2, 0
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
index e6497bca98dc2..f7143711394fa 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
@@ -1086,9 +1086,9 @@ define i1 @test_cmp_v16i8(<16 x i8> %x) {
; CHECK-LABEL: test_cmp_v16i8:
; CHECK: .functype test_cmp_v16i8 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0
-; CHECK-NEXT: v128.any_true $push2=, $pop1
+; CHECK-NEXT: i8x16.all_true $push0=, $0
+; CHECK-NEXT: i32.const $push1=, 1
+; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1
; CHECK-NEXT: return $pop2
%zero = icmp eq <16 x i8> %x, zeroinitializer
%ret = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %zero)
>From a093e7e2200c90b1b1b421c30902009b10df2c2e Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Fri, 20 Jun 2025 09:59:18 -0700
Subject: [PATCH 3/5] Use SDPatternMatching and remove truncate...
Use SDPatternMatching and remove truncation. Also added 4xi64 case to
reflect that.
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 1 +
.../WebAssembly/simd-setcc-reductions.ll | 23 +++++++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6165bff626516..df539d65cf51c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/DiagnosticInfo.h"
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 1d0a688216765..c6a387c022f22 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -57,3 +57,26 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
%conv3 = zext i1 %3 to i32
ret i32 %conv3
}
+
+
+define i32 @all_true_4_i64(<4 x i64> %v) {
+; CHECK-LABEL: all_true_4_i64:
+; CHECK: .functype all_true_4_i64 (v128, v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push9=, 0, 0
+; CHECK-NEXT: local.tee $push8=, $2=, $pop9
+; CHECK-NEXT: i64x2.eq $push1=, $0, $pop8
+; CHECK-NEXT: i64x2.eq $push0=, $1, $2
+; CHECK-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: v128.any_true $push3=, $pop2
+; CHECK-NEXT: i32.const $push4=, -1
+; CHECK-NEXT: i32.xor $push5=, $pop3, $pop4
+; CHECK-NEXT: i32.const $push6=, 1
+; CHECK-NEXT: i32.and $push7=, $pop5, $pop6
+; CHECK-NEXT: return $pop7
+ %1 = icmp eq <4 x i64> %v, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = icmp eq i4 %2, 0
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
>From 9a31f41a497e03924c8f94333bebace86ea959f6 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Thu, 26 Jun 2025 10:03:40 -0700
Subject: [PATCH 4/5] Precommit test to add 3 more any/all true patterns
---
.../WebAssembly/simd-setcc-reductions.ll | 64 +++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index c6a387c022f22..469d2dfc2e26a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -80,3 +80,67 @@ define i32 @all_true_4_i64(<4 x i64> %v) {
%conv3 = zext i1 %3 to i32
ret i32 %conv3
}
+
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), 0, ne
+; => any_true (set_cc (X), 0, ne)
+; => any_true (X)
+define i32 @any_true_1_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: any_true_1_4_i32:
+; CHECK: .functype any_true_1_4_i32 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0
+; CHECK-NEXT: v128.any_true $push2=, $pop1
+; CHECK-NEXT: return $pop2
+ %1 = icmp ne <4 x i32> %v, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = icmp ne i4 %2, 0
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne
+; => not all_true (set_cc (X), 0, eq)
+; => not all_true (set_cc (X), 0, eq)
+; => not not any_true (X)
+; => any_true (X)
+define i32 @any_true_2_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: any_true_2_4_i32:
+; CHECK: .functype any_true_2_4_i32 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0
+; CHECK-NEXT: i32x4.all_true $push2=, $pop1
+; CHECK-NEXT: i32.const $push3=, -1
+; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
+; CHECK-NEXT: i32.const $push5=, 1
+; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
+; CHECK-NEXT: return $pop6
+ %1 = icmp eq <4 x i32> %v, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = icmp ne i4 %2, -1
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
+
+
+; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), -1, eq
+; => all_true (set_cc (X), 0, ne)
+; => all_true (X)
+define i32 @all_true_2_4_i32(<4 x i32> %v) {
+; CHECK-LABEL: all_true_2_4_i32:
+; CHECK: .functype all_true_2_4_i32 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0
+; CHECK-NEXT: i32x4.all_true $push2=, $pop1
+; CHECK-NEXT: return $pop2
+ %1 = icmp ne <4 x i32> %v, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = icmp eq i4 %2, -1
+ %conv3 = zext i1 %3 to i32
+ ret i32 %conv3
+}
+
+
>From 0a086da9fb2ac693b42e8980009e0d97b87511ae Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Thu, 26 Jun 2025 12:50:53 -0700
Subject: [PATCH 5/5] [WebAssembly] Add 3 more optimization for any/all
all_true (setcc x, 0, eq) -> not any_true
any_true (setcc x, 0, ne) -> any_true
all_true (setcc x, 0, ne) -> all_true
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 73 +++++++++++--------
.../WebAssembly/simd-setcc-reductions.ll | 22 ++----
2 files changed, 49 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index df539d65cf51c..c42075ba43a56 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3240,40 +3240,53 @@ static SDValue performBitcastCombine(SDNode *N,
return SDValue();
}
-static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
- // any_true (setcc <X>, 0, eq)
- // => not (all_true X)
-
- SDLoc DL(N);
+static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
+ // any_true (setcc <X>, 0, eq) => (not (all_true X))
+ // all_true (setcc <X>, 0, eq) => (not (any_true X))
+ // any_true (setcc <X>, 0, ne) => (any_true X)
+ // all_true (setcc <X>, 0, ne) => (all_true X)
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
- if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
- return SDValue();
+ using namespace llvm::SDPatternMatch;
+ SDLoc DL(N);
+ static auto SimdCombiner =
+ [&](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType,
+ Intrinsic::WASMIntrinsics InPost, bool ShouldInvert) -> SDValue {
+ if (N->getConstantOperandVal(0) != InPre)
+ return SDValue();
- SDValue SetCC = N->getOperand(1);
- if (SetCC.getOpcode() != ISD::SETCC)
- return SDValue();
+ SDValue LHS;
+ if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
+ m_SpecificCondCode(SetType))))
+ return SDValue();
- SDValue LHS = SetCC->getOperand(0);
- SDValue RHS = SetCC->getOperand(1);
- ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
- EVT LT = LHS.getValueType();
- unsigned NumElts = LT.getVectorNumElements();
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
- return SDValue();
+ EVT LT = LHS.getValueType();
+ unsigned NumElts = LT.getVectorNumElements();
+ if (LT.getScalarSizeInBits() > 128 / NumElts)
+ return SDValue();
- EVT Width = MVT::getIntegerVT(128 / NumElts);
+ SDValue Ret = DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
+ DL, MVT::i1);
+ if (ShouldInvert)
+ Ret = DAG.getNOT(DL, Ret, MVT::i1);
+ return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+ };
- if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
- return SDValue();
+ if (SDValue AnyTrueEQ = SimdCombiner(Intrinsic::wasm_anytrue, ISD::SETEQ,
+ Intrinsic::wasm_alltrue, true))
+ return AnyTrueEQ;
+ if (SDValue AllTrueEQ = SimdCombiner(Intrinsic::wasm_alltrue, ISD::SETEQ,
+ Intrinsic::wasm_anytrue, true))
+ return AllTrueEQ;
+ if (SDValue AnyTrueNE = SimdCombiner(Intrinsic::wasm_anytrue, ISD::SETNE,
+ Intrinsic::wasm_anytrue, false))
+ return AnyTrueNE;
+ if (SDValue AllTrueNE = SimdCombiner(Intrinsic::wasm_alltrue, ISD::SETNE,
+ Intrinsic::wasm_alltrue, false))
+ return AllTrueNE;
- SDValue Ret = DAG.getZExtOrTrunc(
- DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
- DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
- DL, MVT::i1);
- Ret = DAG.getNOT(DL, Ret, MVT::i1);
- return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
+ return SDValue();
}
template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
@@ -3465,8 +3478,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::TRUNCATE:
return performTruncateCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: {
- if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
- return AnyTrueCombine;
+ if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
+ return AnyAllCombine;
return performLowerPartialReduction(N, DCI.DAG);
}
case ISD::MUL:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 469d2dfc2e26a..503c7e857e6e6 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -89,10 +89,8 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) {
; CHECK-LABEL: any_true_1_4_i32:
; CHECK: .functype any_true_1_4_i32 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0
-; CHECK-NEXT: v128.any_true $push2=, $pop1
-; CHECK-NEXT: return $pop2
+; CHECK-NEXT: v128.any_true $push0=, $0
+; CHECK-NEXT: return $pop0
%1 = icmp ne <4 x i32> %v, zeroinitializer
%2 = bitcast <4 x i1> %1 to i4
%3 = icmp ne i4 %2, 0
@@ -109,14 +107,8 @@ define i32 @any_true_2_4_i32(<4 x i32> %v) {
; CHECK-LABEL: any_true_2_4_i32:
; CHECK: .functype any_true_2_4_i32 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0
-; CHECK-NEXT: i32x4.all_true $push2=, $pop1
-; CHECK-NEXT: i32.const $push3=, -1
-; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
-; CHECK-NEXT: i32.const $push5=, 1
-; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
-; CHECK-NEXT: return $pop6
+; CHECK-NEXT: v128.any_true $push0=, $0
+; CHECK-NEXT: return $pop0
%1 = icmp eq <4 x i32> %v, zeroinitializer
%2 = bitcast <4 x i1> %1 to i4
%3 = icmp ne i4 %2, -1
@@ -132,10 +124,8 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) {
; CHECK-LABEL: all_true_2_4_i32:
; CHECK: .functype all_true_2_4_i32 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
-; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0
-; CHECK-NEXT: i32x4.all_true $push2=, $pop1
-; CHECK-NEXT: return $pop2
+; CHECK-NEXT: i32x4.all_true $push0=, $0
+; CHECK-NEXT: return $pop0
%1 = icmp ne <4 x i32> %v, zeroinitializer
%2 = bitcast <4 x i1> %1 to i4
%3 = icmp eq i4 %2, -1
More information about the llvm-commits
mailing list