[llvm] wasm: recognize `any_true` and `all_true` (PR #155885)
Folkert de Vries via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 30 12:19:44 PDT 2025
https://github.com/folkertdev updated https://github.com/llvm/llvm-project/pull/155885
>From 656a2a5c07020fc07e9e67507a5942d60c40438a Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Thu, 28 Aug 2025 01:42:46 +0200
Subject: [PATCH 1/2] wasm: recognize `any_true` and `all_true`
---
.../include/llvm/Target/TargetSelectionDAG.td | 3 +
.../WebAssembly/WebAssemblyISelLowering.cpp | 10 ++
.../WebAssembly/WebAssemblyInstrSIMD.td | 10 ++
.../WebAssemblyTargetTransformInfo.cpp | 16 +++
.../WebAssemblyTargetTransformInfo.h | 1 +
llvm/test/CodeGen/WebAssembly/any-all-true.ll | 125 ++++++++++++++++++
6 files changed, 165 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/any-all-true.ll
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index a4ed62bb5715c..69aa748f0f4f1 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -511,6 +511,9 @@ def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>;
def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>;
def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>;
def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>;
+def vecreduce_and : SDNode<"ISD::VECREDUCE_AND", SDTVecReduce>;
+def vecreduce_or : SDNode<"ISD::VECREDUCE_OR", SDTVecReduce>;
+def vecreduce_xor: SDNode<"ISD::VECREDUCE_XOR", SDTVecReduce>;
def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>;
def vecreduce_fmin : SDNode<"ISD::VECREDUCE_FMIN", SDTFPVecReduce>;
def vecreduce_fmax : SDNode<"ISD::VECREDUCE_FMAX", SDTFPVecReduce>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 35d5c3ed90c91..6e3aab4094459 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -281,6 +281,16 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v16i8, Legal);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v8i16, Legal);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v4i32, Legal);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v16i8, Legal);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v8i16, Legal);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v4i32, Legal);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Legal);
+
// Custom lower bit counting operations for other types to scalarize them.
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 143298b700928..d129313115032 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -997,6 +997,16 @@ def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
}
+def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v16i8 V128:$vec))), (i32 255)), (i32 255), SETEQ)), (ALLTRUE_I8x16 V128:$vec)>;
+def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v8i16 V128:$vec))), (i32 65535)), (i32 65535), SETEQ)), (ALLTRUE_I16x8 V128:$vec)>;
+def : Pat<(i32 (setcc (i32 (vecreduce_and(v4i32 V128:$vec))), (i32 -1), SETEQ)), (ALLTRUE_I32x4 V128:$vec)>;
+def : Pat<(i32 (setcc (i64 (vecreduce_and(v2i64 V128:$vec))), (i64 -1), SETEQ)), (ALLTRUE_I64x2 V128:$vec)>;
+
+def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v16i8 V128:$vec))), (i32 255)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
+def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v8i16 V128:$vec))), (i32 65535)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
+def : Pat<(i32 (setcc (vecreduce_or(v4i32 V128:$vec)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
+def : Pat<(i32 (setcc (vecreduce_or(v2i64 V128:$vec)), (i64 0), SETNE)), (ANYTRUE V128:$vec)>;
+
multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
[(set I32:$dst,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 08fb7586d215e..efba2f8c8f805 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -327,3 +327,19 @@ bool WebAssemblyTTIImpl::isProfitableToSinkOperands(
return false;
}
+
+bool WebAssemblyTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
+ // Always expand on Subtargets without vector instructions.
+ if (!ST->hasSIMD128())
+ return true;
+
+ // Whether or not to expand is a per-intrinsic decision.
+ switch (II->getIntrinsicID()) {
+ default:
+ return true;
+ case Intrinsic::vector_reduce_and:
+ return false;
+ case Intrinsic::vector_reduce_or:
+ return false;
+ }
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index c915eeb07d4fd..996b5e45daad1 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -100,6 +100,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
bool isProfitableToSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
+ bool shouldExpandReduction(const IntrinsicInst *II) const override;
/// @}
};
diff --git a/llvm/test/CodeGen/WebAssembly/any-all-true.ll b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
new file mode 100644
index 0000000000000..0db5b90ebd053
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+
+define zeroext i1 @manual_i8x16_all_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i8x16_all_true:
+; CHECK: .functype manual_i8x16_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <16 x i8>
+ %0 = tail call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %_3)
+ %_0 = icmp eq i8 %0, -1
+ ret i1 %_0
+}
+
+define zeroext i1 @manual_i16x8_all_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i16x8_all_true:
+; CHECK: .functype manual_i16x8_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <8 x i16>
+ %0 = tail call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %_3)
+ %_0 = icmp eq i16 %0, -1
+ ret i1 %_0
+}
+
+define zeroext i1 @manual_i32x4_all_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i32x4_all_true:
+; CHECK: .functype manual_i32x4_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
+ %_0 = icmp eq i32 %0, -1
+ ret i1 %_0
+}
+
+define zeroext i1 @manual_i64x2_all_true(<2 x i64> %a) {
+; CHECK-LABEL: manual_i64x2_all_true:
+; CHECK: .functype manual_i64x2_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64x2.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
+ %_0 = icmp eq i64 %0, -1
+ ret i1 %_0
+}
+
+; ---
+
+define zeroext i1 @manual_i8x16_any_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i8x16_any_true:
+; CHECK: .functype manual_i8x16_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <16 x i8>
+ %0 = tail call i8 @llvm.vector.reduce.or.v16x8(<16 x i8> %_3)
+ %_0 = icmp ne i8 %0, 0
+ ret i1 %_0
+}
+
+define i1 @i16x8_any_true(<4 x i32> %a) {
+; CHECK-LABEL: i16x8_any_true:
+; CHECK: .functype i16x8_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <8 x i16>
+ %0 = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %_3)
+ %_0 = icmp ne i16 %0, 0
+ ret i1 %_0
+}
+
+define i1 @manual_i32x4_any_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i32x4_any_true:
+; CHECK: .functype manual_i32x4_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
+ %_0 = icmp ne i32 %0, 0
+ ret i1 %_0
+}
+
+
+define zeroext i1 @manual_i64x2_any_true(<2 x i64> %a) {
+; CHECK-LABEL: manual_i64x2_any_true:
+; CHECK: .functype manual_i64x2_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
+ %_0 = icmp ne i64 %0, 0
+ ret i1 %_0
+}
>From 49897d4ecdc9db3fbaf175416c3ab56046dfa09e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sat, 30 Aug 2025 21:02:27 +0200
Subject: [PATCH 2/2] wasm: explicitly combine setcc and vecreduce
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 84 ++++++++++++++++---
.../WebAssembly/WebAssemblyInstrSIMD.td | 10 ---
llvm/test/CodeGen/WebAssembly/any-all-true.ll | 2 +-
3 files changed, 74 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6e3aab4094459..db292ab32f8b2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -281,16 +281,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v16i8, Legal);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v8i16, Legal);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v4i32, Legal);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Legal);
-
- setOperationAction(ISD::VECREDUCE_OR, MVT::v16i8, Legal);
- setOperationAction(ISD::VECREDUCE_OR, MVT::v8i16, Legal);
- setOperationAction(ISD::VECREDUCE_OR, MVT::v4i32, Legal);
- setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Legal);
-
// Custom lower bit counting operations for other types to scalarize them.
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
@@ -3396,6 +3386,75 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
}
+// Combine a setcc of a vecreduce, for example:
+//
+// setcc (vecreduce_or(v4i32 V128:$vec)), (i32 0), SETNE
+// ==> ANYTRUE V128:$vec
+//
+// setcc (i32 (vecreduce_and(v4i32 V128:$vec))), (i32 -1), SETEQ
+// ==> ALLTRUE_I32x4 V128:$vec
+static SDValue combineSetCCVecReduce(SDNode *SetCC,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Reduce = SetCC->getOperand(0);
+ SDValue Constant = SetCC->getOperand(1);
+ SDValue Cond = SetCC->getOperand(2);
+ unsigned ReduceIntrinsic;
+
+ // i8 and i16 truncate the vecreduce result.
+ if (Reduce->getOpcode() == ISD::AND) {
+ SDValue L = Reduce->getOperand(0), R = Reduce->getOperand(1);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(R);
+ if (!C)
+ return SDValue();
+
+ EVT VT = Reduce->getValueType(0);
+ if (VT == MVT::v16i8 && C->getZExtValue() == 255) {
+ Reduce = L;
+ } else if (VT == MVT::v8i16 && C->getZExtValue() == 65535) {
+ Reduce = L;
+ } else {
+ return SDValue();
+ }
+ }
+
+ switch (Reduce->getOpcode()) {
+ case ISD::VECREDUCE_OR: {
+ ReduceIntrinsic = Intrinsic::wasm_anytrue;
+
+ if (cast<CondCodeSDNode>(Cond)->get() != ISD::SETNE)
+ return SDValue();
+
+ if (cast<ConstantSDNode>(Constant)->getSExtValue() != 0)
+ return SDValue();
+
+ break;
+ }
+ case ISD::VECREDUCE_AND: {
+ ReduceIntrinsic = Intrinsic::wasm_alltrue;
+
+ if (cast<CondCodeSDNode>(Cond)->get() != ISD::SETEQ)
+ return SDValue();
+
+ if (cast<ConstantSDNode>(Constant)->getSExtValue() != -1)
+ return SDValue();
+
+ break;
+ }
+ default:
+ return SDValue();
+ }
+
+ SDLoc DL(SetCC);
+ auto &DAG = DCI.DAG;
+ SDValue Match = Reduce->getOperand(0);
+
+ return DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ {DAG.getConstant(ReduceIntrinsic, DL, MVT::i32), Match}),
+ DL, MVT::i1);
+}
+
/// Try to convert a i128 comparison to a v16i8 comparison before type
/// legalization splits it up into chunks
static SDValue
@@ -3456,6 +3515,9 @@ static SDValue performSETCCCombine(SDNode *N,
if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
return V;
+ if (SDValue V = combineSetCCVecReduce(N, DCI))
+ return V;
+
SDValue LHS = N->getOperand(0);
if (LHS->getOpcode() != ISD::BITCAST)
return SDValue();
@@ -3470,9 +3532,9 @@ static SDValue performSETCCCombine(SDNode *N,
if (!cast<ConstantSDNode>(N->getOperand(1)))
return SDValue();
-
EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts));
auto &DAG = DCI.DAG;
+
// setcc (iN (bitcast (vNi1 X))), 0, ne
// ==> any_true (vNi1 X)
if (auto Match = TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index d129313115032..143298b700928 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -997,16 +997,6 @@ def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
}
-def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v16i8 V128:$vec))), (i32 255)), (i32 255), SETEQ)), (ALLTRUE_I8x16 V128:$vec)>;
-def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v8i16 V128:$vec))), (i32 65535)), (i32 65535), SETEQ)), (ALLTRUE_I16x8 V128:$vec)>;
-def : Pat<(i32 (setcc (i32 (vecreduce_and(v4i32 V128:$vec))), (i32 -1), SETEQ)), (ALLTRUE_I32x4 V128:$vec)>;
-def : Pat<(i32 (setcc (i64 (vecreduce_and(v2i64 V128:$vec))), (i64 -1), SETEQ)), (ALLTRUE_I64x2 V128:$vec)>;
-
-def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v16i8 V128:$vec))), (i32 255)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
-def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v8i16 V128:$vec))), (i32 65535)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
-def : Pat<(i32 (setcc (vecreduce_or(v4i32 V128:$vec)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
-def : Pat<(i32 (setcc (vecreduce_or(v2i64 V128:$vec)), (i64 0), SETNE)), (ANYTRUE V128:$vec)>;
-
multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
[(set I32:$dst,
diff --git a/llvm/test/CodeGen/WebAssembly/any-all-true.ll b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
index 0db5b90ebd053..b6fd0cde83bec 100644
--- a/llvm/test/CodeGen/WebAssembly/any-all-true.ll
+++ b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
@@ -111,7 +111,7 @@ start:
}
-define zeroext i1 @manual_i64x2_any_true(<2 x i64> %a) {
+define i1 @manual_i64x2_any_true(<2 x i64> %a) {
; CHECK-LABEL: manual_i64x2_any_true:
; CHECK: .functype manual_i64x2_any_true (v128) -> (i32)
; CHECK-NEXT: # %bb.0: # %start
More information about the llvm-commits
mailing list