[llvm] wasm: recognize `any_true` and `all_true` (PR #155885)
Folkert de Vries via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 7 08:47:59 PST 2026
https://github.com/folkertdev updated https://github.com/llvm/llvm-project/pull/155885
>From 1e4521f30f0463cffa6755e562bc9efcd2678fe6 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Thu, 28 Aug 2025 01:42:46 +0200
Subject: [PATCH 1/9] wasm: recognize `any_true` and `all_true`
---
.../include/llvm/Target/TargetSelectionDAG.td | 3 +
.../WebAssembly/WebAssemblyISelLowering.cpp | 10 ++
.../WebAssembly/WebAssemblyInstrSIMD.td | 10 ++
.../WebAssemblyTargetTransformInfo.cpp | 16 +++
.../WebAssemblyTargetTransformInfo.h | 1 +
llvm/test/CodeGen/WebAssembly/any-all-true.ll | 125 ++++++++++++++++++
6 files changed, 165 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/any-all-true.ll
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index b297fd06711a5..f9797b35c3489 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -525,6 +525,9 @@ def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>;
def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>;
def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>;
def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>;
+def vecreduce_and : SDNode<"ISD::VECREDUCE_AND", SDTVecReduce>;
+def vecreduce_or : SDNode<"ISD::VECREDUCE_OR", SDTVecReduce>;
+def vecreduce_xor: SDNode<"ISD::VECREDUCE_XOR", SDTVecReduce>;
def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>;
def vecreduce_fmin : SDNode<"ISD::VECREDUCE_FMIN", SDTFPVecReduce>;
def vecreduce_fmax : SDNode<"ISD::VECREDUCE_FMAX", SDTFPVecReduce>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 5abf0e8f59d2a..6c775d88f1405 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -292,6 +292,16 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v16i8, Legal);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v8i16, Legal);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v4i32, Legal);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v16i8, Legal);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v8i16, Legal);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v4i32, Legal);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Legal);
+
// Custom lower bit counting operations for other types to scalarize them.
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index fff9049ed39ba..0cc3c984d3249 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1002,6 +1002,16 @@ def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
}
+def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v16i8 V128:$vec))), (i32 255)), (i32 255), SETEQ)), (ALLTRUE_I8x16 V128:$vec)>;
+def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v8i16 V128:$vec))), (i32 65535)), (i32 65535), SETEQ)), (ALLTRUE_I16x8 V128:$vec)>;
+def : Pat<(i32 (setcc (i32 (vecreduce_and(v4i32 V128:$vec))), (i32 -1), SETEQ)), (ALLTRUE_I32x4 V128:$vec)>;
+def : Pat<(i32 (setcc (i64 (vecreduce_and(v2i64 V128:$vec))), (i64 -1), SETEQ)), (ALLTRUE_I64x2 V128:$vec)>;
+
+def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v16i8 V128:$vec))), (i32 255)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
+def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v8i16 V128:$vec))), (i32 65535)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
+def : Pat<(i32 (setcc (vecreduce_or(v4i32 V128:$vec)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
+def : Pat<(i32 (setcc (vecreduce_or(v2i64 V128:$vec)), (i64 0), SETNE)), (ANYTRUE V128:$vec)>;
+
multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
[(set I32:$dst,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 33498805112c0..a02662bfee04d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -616,3 +616,19 @@ WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC,
return std::nullopt;
}
+
+bool WebAssemblyTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
+ // Always expand on Subtargets without vector instructions.
+ if (!ST->hasSIMD128())
+ return true;
+
+ // Whether or not to expand is a per-intrinsic decision.
+ switch (II->getIntrinsicID()) {
+ default:
+ return true;
+ case Intrinsic::vector_reduce_and:
+ return false;
+ case Intrinsic::vector_reduce_or:
+ return false;
+ }
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 924249e051321..37ae003abcadb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -113,6 +113,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
std::optional<Instruction *>
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
+ bool shouldExpandReduction(const IntrinsicInst *II) const override;
/// @}
};
diff --git a/llvm/test/CodeGen/WebAssembly/any-all-true.ll b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
new file mode 100644
index 0000000000000..0db5b90ebd053
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+
+define zeroext i1 @manual_i8x16_all_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i8x16_all_true:
+; CHECK: .functype manual_i8x16_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <16 x i8>
+ %0 = tail call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %_3)
+ %_0 = icmp eq i8 %0, -1
+ ret i1 %_0
+}
+
+define zeroext i1 @manual_i16x8_all_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i16x8_all_true:
+; CHECK: .functype manual_i16x8_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <8 x i16>
+ %0 = tail call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %_3)
+ %_0 = icmp eq i16 %0, -1
+ ret i1 %_0
+}
+
+define zeroext i1 @manual_i32x4_all_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i32x4_all_true:
+; CHECK: .functype manual_i32x4_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
+ %_0 = icmp eq i32 %0, -1
+ ret i1 %_0
+}
+
+define zeroext i1 @manual_i64x2_all_true(<2 x i64> %a) {
+; CHECK-LABEL: manual_i64x2_all_true:
+; CHECK: .functype manual_i64x2_all_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64x2.all_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
+ %_0 = icmp eq i64 %0, -1
+ ret i1 %_0
+}
+
+; ---
+
+define zeroext i1 @manual_i8x16_any_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i8x16_any_true:
+; CHECK: .functype manual_i8x16_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <16 x i8>
+ %0 = tail call i8 @llvm.vector.reduce.or.v16x8(<16 x i8> %_3)
+ %_0 = icmp ne i8 %0, 0
+ ret i1 %_0
+}
+
+define i1 @i16x8_any_true(<4 x i32> %a) {
+; CHECK-LABEL: i16x8_any_true:
+; CHECK: .functype i16x8_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %_3 = bitcast <4 x i32> %a to <8 x i16>
+ %0 = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %_3)
+ %_0 = icmp ne i16 %0, 0
+ ret i1 %_0
+}
+
+define i1 @manual_i32x4_any_true(<4 x i32> %a) {
+; CHECK-LABEL: manual_i32x4_any_true:
+; CHECK: .functype manual_i32x4_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
+ %_0 = icmp ne i32 %0, 0
+ ret i1 %_0
+}
+
+
+define zeroext i1 @manual_i64x2_any_true(<2 x i64> %a) {
+; CHECK-LABEL: manual_i64x2_any_true:
+; CHECK: .functype manual_i64x2_any_true (v128) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.any_true
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
+ %_0 = icmp ne i64 %0, 0
+ ret i1 %_0
+}
>From f9b52b6f1ebe3bf3546f9fa321744c447220854c Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sat, 30 Aug 2025 21:02:27 +0200
Subject: [PATCH 2/9] wasm: explicitly combine setcc and vecreduce
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 122 ++++++++++++++++--
.../WebAssembly/WebAssemblyISelLowering.h | 1 +
.../WebAssembly/WebAssemblyInstrSIMD.td | 10 --
.../WebAssemblyTargetTransformInfo.cpp | 1 -
llvm/test/CodeGen/WebAssembly/any-all-true.ll | 10 +-
5 files changed, 116 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6c775d88f1405..8f3ed79074666 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -244,12 +244,20 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// We have custom shuffle lowering to expose the shuffle mask
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
- MVT::v2f64})
+ MVT::v2f64}) {
setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
- if (Subtarget->hasFP16())
+ setOperationAction(ISD::VECREDUCE_OR, T, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, T, Custom);
+ }
+
+ if (Subtarget->hasFP16()) {
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v8f16, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v8f16, Custom);
+ }
+
// Support splatting
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64})
@@ -292,16 +300,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v16i8, Legal);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v8i16, Legal);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v4i32, Legal);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Legal);
-
- setOperationAction(ISD::VECREDUCE_OR, MVT::v16i8, Legal);
- setOperationAction(ISD::VECREDUCE_OR, MVT::v8i16, Legal);
- setOperationAction(ISD::VECREDUCE_OR, MVT::v4i32, Legal);
- setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Legal);
-
// Custom lower bit counting operations for other types to scalarize them.
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
@@ -1749,6 +1747,9 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_AND:
+ return LowerVECREDUCE(Op, DAG);
case ISD::SETCC:
return LowerSETCC(Op, DAG);
case ISD::SHL:
@@ -2728,6 +2729,61 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
}
+static SDValue emitShuffleReduceTree(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Vec, unsigned BaseOpc) {
+ EVT VecVT = Vec.getValueType();
+ assert(VecVT.isVector() && "expected vector");
+
+ auto foldInHalf = [&](ArrayRef<int> Mask) -> void {
+ SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Vec, Vec, Mask);
+ Vec = DAG.getNode(BaseOpc, DL, VecVT, Vec, Shuf);
+ };
+
+ if (VecVT == MVT::v16i8) {
+ foldInHalf({8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0});
+ foldInHalf({4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
+ foldInHalf({2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
+ foldInHalf({1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
+ return Vec;
+ } else if (VecVT == MVT::v8i16) {
+ foldInHalf({4, 5, 6, 7, 0, 0, 0, 0});
+ foldInHalf({2, 3, 0, 0, 0, 0, 0, 0});
+ foldInHalf({1, 0, 0, 0, 0, 0, 0, 0});
+ return Vec;
+ } else if (VecVT == MVT::v4i32) {
+ foldInHalf({2, 3, 0, 0});
+ foldInHalf({1, 0, 0, 0});
+ return Vec;
+ } else if (VecVT == MVT::v2i64) {
+ foldInHalf({1, 0});
+ return Vec;
+ }
+
+ return SDValue();
+}
+
+SDValue WebAssemblyTargetLowering::LowerVECREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ const SDLoc DL(Op);
+ // Only ISD::VECREDUCE_AND and ISD::VECREDUCE_OR are custom-lowered currently.
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
+ if (BaseOpc != ISD::AND && BaseOpc != ISD::OR)
+ return SDValue();
+
+ if (!Subtarget->hasSIMD128())
+ return SDValue();
+
+ SDValue ReducedVec =
+ emitShuffleReduceTree(DAG, DL, Op.getOperand(0), BaseOpc);
+ if (!ReducedVec)
+ return SDValue();
+
+ // Extract lane 0 (the reduced value) and convert to the result type.
+ EVT EltVT = ReducedVec.getValueType().getVectorElementType();
+ SDValue Lane0 = DAG.getExtractVectorElt(DL, EltVT, ReducedVec, 0);
+ return DAG.getZExtOrTrunc(Lane0, DL, Op.getValueType());
+}
+
SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -3402,6 +3458,45 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
}
+// Combine a setcc of a vecreduce, for example:
+//
+// setcc (vecreduce_or(v4i32 V128:$vec)), (i32 0), SETNE
+// ==> ANYTRUE V128:$vec
+//
+// setcc (i32 (vecreduce_and(v4i32 V128:$vec))), (i32 0), SETNE
+// ==> ALLTRUE_I32x4 V128:$vec
+static SDValue combineSetCCVecReduce(SDNode *SetCC,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Reduce = SetCC->getOperand(0);
+ SDValue Constant = SetCC->getOperand(1);
+ SDValue Cond = SetCC->getOperand(2);
+
+ unsigned ReduceIntrinsic;
+ if (Reduce->getOpcode() == ISD::VECREDUCE_OR) {
+ ReduceIntrinsic = Intrinsic::wasm_anytrue;
+ } else if (Reduce->getOpcode() == ISD::VECREDUCE_AND) {
+ ReduceIntrinsic = Intrinsic::wasm_alltrue;
+ } else {
+ return SDValue();
+ }
+
+ if (cast<CondCodeSDNode>(Cond)->get() != ISD::SETNE)
+ return SDValue();
+
+ if (cast<ConstantSDNode>(Constant)->getSExtValue() != 0)
+ return SDValue();
+
+ SDLoc DL(SetCC);
+ SelectionDAG &DAG = DCI.DAG;
+
+ SDValue Match = Reduce->getOperand(0);
+ SDValue Intrinsic = DAG.getConstant(ReduceIntrinsic, DL, MVT::i32);
+ SDValue Chain =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, {Intrinsic, Match});
+
+ return DAG.getZExtOrTrunc(Chain, DL, MVT::i1);
+}
+
/// Try to convert a i128 comparison to a v16i8 comparison before type
/// legalization splits it up into chunks
static SDValue
@@ -3462,6 +3557,9 @@ static SDValue performSETCCCombine(SDNode *N,
if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
return V;
+ if (SDValue V = combineSetCCVecReduce(N, DCI))
+ return V;
+
SDValue LHS = N->getOperand(0);
if (LHS->getOpcode() != ISD::BITCAST)
return SDValue();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 204384f06ab25..9e6c4dcb07d1c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -115,6 +115,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 0cc3c984d3249..fff9049ed39ba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1002,16 +1002,6 @@ def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
}
-def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v16i8 V128:$vec))), (i32 255)), (i32 255), SETEQ)), (ALLTRUE_I8x16 V128:$vec)>;
-def : Pat<(i32 (setcc (and (i32 (vecreduce_and (v8i16 V128:$vec))), (i32 65535)), (i32 65535), SETEQ)), (ALLTRUE_I16x8 V128:$vec)>;
-def : Pat<(i32 (setcc (i32 (vecreduce_and(v4i32 V128:$vec))), (i32 -1), SETEQ)), (ALLTRUE_I32x4 V128:$vec)>;
-def : Pat<(i32 (setcc (i64 (vecreduce_and(v2i64 V128:$vec))), (i64 -1), SETEQ)), (ALLTRUE_I64x2 V128:$vec)>;
-
-def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v16i8 V128:$vec))), (i32 255)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
-def : Pat<(i32 (setcc (and (i32 (vecreduce_or(v8i16 V128:$vec))), (i32 65535)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
-def : Pat<(i32 (setcc (vecreduce_or(v4i32 V128:$vec)), (i32 0), SETNE)), (ANYTRUE V128:$vec)>;
-def : Pat<(i32 (setcc (vecreduce_or(v2i64 V128:$vec)), (i64 0), SETNE)), (ANYTRUE V128:$vec)>;
-
multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
[(set I32:$dst,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index a02662bfee04d..e4639334245df 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -627,7 +627,6 @@ bool WebAssemblyTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
default:
return true;
case Intrinsic::vector_reduce_and:
- return false;
case Intrinsic::vector_reduce_or:
return false;
}
diff --git a/llvm/test/CodeGen/WebAssembly/any-all-true.ll b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
index 0db5b90ebd053..9f973f37b913c 100644
--- a/llvm/test/CodeGen/WebAssembly/any-all-true.ll
+++ b/llvm/test/CodeGen/WebAssembly/any-all-true.ll
@@ -23,7 +23,7 @@ define zeroext i1 @manual_i8x16_all_true(<4 x i32> %a) {
start:
%_3 = bitcast <4 x i32> %a to <16 x i8>
%0 = tail call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %_3)
- %_0 = icmp eq i8 %0, -1
+ %_0 = icmp ne i8 %0, 0
ret i1 %_0
}
@@ -37,7 +37,7 @@ define zeroext i1 @manual_i16x8_all_true(<4 x i32> %a) {
start:
%_3 = bitcast <4 x i32> %a to <8 x i16>
%0 = tail call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %_3)
- %_0 = icmp eq i16 %0, -1
+ %_0 = icmp ne i16 %0, 0
ret i1 %_0
}
@@ -50,7 +50,7 @@ define zeroext i1 @manual_i32x4_all_true(<4 x i32> %a) {
; CHECK-NEXT: # fallthrough-return
start:
%0 = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
- %_0 = icmp eq i32 %0, -1
+ %_0 = icmp ne i32 %0, 0
ret i1 %_0
}
@@ -63,7 +63,7 @@ define zeroext i1 @manual_i64x2_all_true(<2 x i64> %a) {
; CHECK-NEXT: # fallthrough-return
start:
%0 = tail call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
- %_0 = icmp eq i64 %0, -1
+ %_0 = icmp ne i64 %0, 0
ret i1 %_0
}
@@ -111,7 +111,7 @@ start:
}
-define zeroext i1 @manual_i64x2_any_true(<2 x i64> %a) {
+define i1 @manual_i64x2_any_true(<2 x i64> %a) {
; CHECK-LABEL: manual_i64x2_any_true:
; CHECK: .functype manual_i64x2_any_true (v128) -> (i32)
; CHECK-NEXT: # %bb.0: # %start
>From 195f9d99c92389c30d89c5fdd231360bb854d5bc Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sun, 1 Feb 2026 19:16:30 +0100
Subject: [PATCH 3/9] only keep regular vector types (expand anything else)
---
.../WebAssemblyTargetTransformInfo.cpp | 6 ++-
.../WebAssembly/simd-vecreduce-bool.ll | 54 +++++++++++++------
2 files changed, 42 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index e4639334245df..cbd898e5fd812 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -627,7 +627,9 @@ bool WebAssemblyTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
default:
return true;
case Intrinsic::vector_reduce_and:
- case Intrinsic::vector_reduce_or:
- return false;
+ case Intrinsic::vector_reduce_or: {
+ auto *VTy = dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
+ return !VTy || VTy->getPrimitiveSizeInBits() != 128;
+ }
}
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
index 70c6baf2be005..d8d55f3a99543 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
@@ -244,13 +244,25 @@ define i1 @test_any_v7i1(<7 x i1> %x) {
; CHECK-LABEL: test_any_v7i1:
; CHECK: .functype test_any_v7i1 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.or $push0=, $0, $1
-; CHECK-NEXT: i32.or $push1=, $pop0, $2
-; CHECK-NEXT: i32.or $push2=, $pop1, $3
-; CHECK-NEXT: i32.or $push3=, $pop2, $4
-; CHECK-NEXT: i32.or $push4=, $pop3, $5
-; CHECK-NEXT: i32.or $push5=, $pop4, $6
-; CHECK-NEXT: return $pop5
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i16x8.replace_lane $push1=, $pop0, 0, $0
+; CHECK-NEXT: i16x8.replace_lane $push2=, $pop1, 1, $1
+; CHECK-NEXT: i16x8.replace_lane $push3=, $pop2, 2, $2
+; CHECK-NEXT: i16x8.replace_lane $push4=, $pop3, 3, $3
+; CHECK-NEXT: i16x8.replace_lane $push5=, $pop4, 4, $4
+; CHECK-NEXT: i16x8.replace_lane $push6=, $pop5, 5, $5
+; CHECK-NEXT: i16x8.replace_lane $push17=, $pop6, 6, $6
+; CHECK-NEXT: local.tee $push16=, $7=, $pop17
+; CHECK-NEXT: i8x16.shuffle $push7=, $7, $7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK-NEXT: v128.or $push15=, $pop16, $pop7
+; CHECK-NEXT: local.tee $push14=, $7=, $pop15
+; CHECK-NEXT: i8x16.shuffle $push8=, $7, $7, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK-NEXT: v128.or $push13=, $pop14, $pop8
+; CHECK-NEXT: local.tee $push12=, $7=, $pop13
+; CHECK-NEXT: i8x16.shuffle $push9=, $7, $7, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK-NEXT: v128.or $push10=, $pop12, $pop9
+; CHECK-NEXT: i16x8.extract_lane_u $push11=, $pop10, 0
+; CHECK-NEXT: return $pop11
%ret = call i1 @llvm.vector.reduce.or.v7i1(<7 x i1> %x)
ret i1 %ret
}
@@ -259,15 +271,25 @@ define i1 @test_all_v7i1(<7 x i1> %x) {
; CHECK-LABEL: test_all_v7i1:
; CHECK: .functype test_all_v7i1 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.and $push0=, $0, $1
-; CHECK-NEXT: i32.and $push1=, $pop0, $2
-; CHECK-NEXT: i32.and $push2=, $pop1, $3
-; CHECK-NEXT: i32.and $push3=, $pop2, $4
-; CHECK-NEXT: i32.and $push4=, $pop3, $5
-; CHECK-NEXT: i32.and $push5=, $pop4, $6
-; CHECK-NEXT: i32.const $push6=, 1
-; CHECK-NEXT: i32.and $push7=, $pop5, $pop6
-; CHECK-NEXT: return $pop7
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 1
+; CHECK-NEXT: i16x8.replace_lane $push1=, $pop0, 0, $0
+; CHECK-NEXT: i16x8.replace_lane $push2=, $pop1, 1, $1
+; CHECK-NEXT: i16x8.replace_lane $push3=, $pop2, 2, $2
+; CHECK-NEXT: i16x8.replace_lane $push4=, $pop3, 3, $3
+; CHECK-NEXT: i16x8.replace_lane $push5=, $pop4, 4, $4
+; CHECK-NEXT: i16x8.replace_lane $push6=, $pop5, 5, $5
+; CHECK-NEXT: i16x8.replace_lane $push17=, $pop6, 6, $6
+; CHECK-NEXT: local.tee $push16=, $7=, $pop17
+; CHECK-NEXT: i8x16.shuffle $push7=, $7, $7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK-NEXT: v128.and $push15=, $pop16, $pop7
+; CHECK-NEXT: local.tee $push14=, $7=, $pop15
+; CHECK-NEXT: i8x16.shuffle $push8=, $7, $7, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK-NEXT: v128.and $push13=, $pop14, $pop8
+; CHECK-NEXT: local.tee $push12=, $7=, $pop13
+; CHECK-NEXT: i8x16.shuffle $push9=, $7, $7, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK-NEXT: v128.and $push10=, $pop12, $pop9
+; CHECK-NEXT: i16x8.extract_lane_u $push11=, $pop10, 0
+; CHECK-NEXT: return $pop11
%ret = call i1 @llvm.vector.reduce.and.v7i1(<7 x i1> %x)
ret i1 %ret
}
>From 0ea885bef4cc6c34ab7b85cdcf01d810e765e3fc Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Tue, 3 Feb 2026 22:43:25 +0100
Subject: [PATCH 4/9] for vectors with fewer than 16 elements, use a scalar
expansion
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 6 +++
.../WebAssembly/simd-vecreduce-bool.ll | 54 ++++++-------------
2 files changed, 22 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 8f3ed79074666..16782deba47bb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2773,6 +2773,12 @@ SDValue WebAssemblyTargetLowering::LowerVECREDUCE(SDValue Op,
if (!Subtarget->hasSIMD128())
return SDValue();
+ // Expand to a sequence of scalar operations when the vector is small.
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ if (VecVT.getVectorNumElements() < 16)
+ return SDValue();
+
SDValue ReducedVec =
emitShuffleReduceTree(DAG, DL, Op.getOperand(0), BaseOpc);
if (!ReducedVec)
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
index d8d55f3a99543..70c6baf2be005 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
@@ -244,25 +244,13 @@ define i1 @test_any_v7i1(<7 x i1> %x) {
; CHECK-LABEL: test_any_v7i1:
; CHECK: .functype test_any_v7i1 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT: i16x8.replace_lane $push1=, $pop0, 0, $0
-; CHECK-NEXT: i16x8.replace_lane $push2=, $pop1, 1, $1
-; CHECK-NEXT: i16x8.replace_lane $push3=, $pop2, 2, $2
-; CHECK-NEXT: i16x8.replace_lane $push4=, $pop3, 3, $3
-; CHECK-NEXT: i16x8.replace_lane $push5=, $pop4, 4, $4
-; CHECK-NEXT: i16x8.replace_lane $push6=, $pop5, 5, $5
-; CHECK-NEXT: i16x8.replace_lane $push17=, $pop6, 6, $6
-; CHECK-NEXT: local.tee $push16=, $7=, $pop17
-; CHECK-NEXT: i8x16.shuffle $push7=, $7, $7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK-NEXT: v128.or $push15=, $pop16, $pop7
-; CHECK-NEXT: local.tee $push14=, $7=, $pop15
-; CHECK-NEXT: i8x16.shuffle $push8=, $7, $7, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK-NEXT: v128.or $push13=, $pop14, $pop8
-; CHECK-NEXT: local.tee $push12=, $7=, $pop13
-; CHECK-NEXT: i8x16.shuffle $push9=, $7, $7, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK-NEXT: v128.or $push10=, $pop12, $pop9
-; CHECK-NEXT: i16x8.extract_lane_u $push11=, $pop10, 0
-; CHECK-NEXT: return $pop11
+; CHECK-NEXT: i32.or $push0=, $0, $1
+; CHECK-NEXT: i32.or $push1=, $pop0, $2
+; CHECK-NEXT: i32.or $push2=, $pop1, $3
+; CHECK-NEXT: i32.or $push3=, $pop2, $4
+; CHECK-NEXT: i32.or $push4=, $pop3, $5
+; CHECK-NEXT: i32.or $push5=, $pop4, $6
+; CHECK-NEXT: return $pop5
%ret = call i1 @llvm.vector.reduce.or.v7i1(<7 x i1> %x)
ret i1 %ret
}
@@ -271,25 +259,15 @@ define i1 @test_all_v7i1(<7 x i1> %x) {
; CHECK-LABEL: test_all_v7i1:
; CHECK: .functype test_all_v7i1 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 1
-; CHECK-NEXT: i16x8.replace_lane $push1=, $pop0, 0, $0
-; CHECK-NEXT: i16x8.replace_lane $push2=, $pop1, 1, $1
-; CHECK-NEXT: i16x8.replace_lane $push3=, $pop2, 2, $2
-; CHECK-NEXT: i16x8.replace_lane $push4=, $pop3, 3, $3
-; CHECK-NEXT: i16x8.replace_lane $push5=, $pop4, 4, $4
-; CHECK-NEXT: i16x8.replace_lane $push6=, $pop5, 5, $5
-; CHECK-NEXT: i16x8.replace_lane $push17=, $pop6, 6, $6
-; CHECK-NEXT: local.tee $push16=, $7=, $pop17
-; CHECK-NEXT: i8x16.shuffle $push7=, $7, $7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK-NEXT: v128.and $push15=, $pop16, $pop7
-; CHECK-NEXT: local.tee $push14=, $7=, $pop15
-; CHECK-NEXT: i8x16.shuffle $push8=, $7, $7, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK-NEXT: v128.and $push13=, $pop14, $pop8
-; CHECK-NEXT: local.tee $push12=, $7=, $pop13
-; CHECK-NEXT: i8x16.shuffle $push9=, $7, $7, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK-NEXT: v128.and $push10=, $pop12, $pop9
-; CHECK-NEXT: i16x8.extract_lane_u $push11=, $pop10, 0
-; CHECK-NEXT: return $pop11
+; CHECK-NEXT: i32.and $push0=, $0, $1
+; CHECK-NEXT: i32.and $push1=, $pop0, $2
+; CHECK-NEXT: i32.and $push2=, $pop1, $3
+; CHECK-NEXT: i32.and $push3=, $pop2, $4
+; CHECK-NEXT: i32.and $push4=, $pop3, $5
+; CHECK-NEXT: i32.and $push5=, $pop4, $6
+; CHECK-NEXT: i32.const $push6=, 1
+; CHECK-NEXT: i32.and $push7=, $pop5, $pop6
+; CHECK-NEXT: return $pop7
%ret = call i1 @llvm.vector.reduce.and.v7i1(<7 x i1> %x)
ret i1 %ret
}
>From a9051c726d3c28c69f2aae4e768ec33c4fd06099 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Wed, 4 Feb 2026 21:09:09 +0100
Subject: [PATCH 5/9] update tests
---
.../test/CodeGen/WebAssembly/vector-reduce.ll | 94 +++++++++++--------
1 file changed, 54 insertions(+), 40 deletions(-)
diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
index 4c30a3adf2378..982d06230667e 100644
--- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
+++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
@@ -146,9 +146,9 @@ define i64 @pairwise_and_v2i64(<2 x i64> %arg) {
; SIMD128-LABEL: pairwise_and_v2i64:
; SIMD128: .functype pairwise_and_v2i64 (v128) -> (i64)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
-; SIMD128-NEXT: v128.and $push1=, $0, $pop0
-; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
+; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 0
+; SIMD128-NEXT: i64x2.extract_lane $push0=, $0, 1
+; SIMD128-NEXT: i64.and $push2=, $pop1, $pop0
; SIMD128-NEXT: return $pop2
%res = tail call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %arg)
ret i64 %res
@@ -158,13 +158,14 @@ define i32 @pairwise_and_v4i32(<4 x i32> %arg) {
; SIMD128-LABEL: pairwise_and_v4i32:
; SIMD128: .functype pairwise_and_v4i32 (v128) -> (i32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: v128.and $push5=, $0, $pop0
-; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
-; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: v128.and $push2=, $pop4, $pop1
-; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
-; SIMD128-NEXT: return $pop3
+; SIMD128-NEXT: i32x4.extract_lane $push1=, $0, 0
+; SIMD128-NEXT: i32x4.extract_lane $push0=, $0, 1
+; SIMD128-NEXT: i32.and $push2=, $pop1, $pop0
+; SIMD128-NEXT: i32x4.extract_lane $push3=, $0, 2
+; SIMD128-NEXT: i32.and $push4=, $pop2, $pop3
+; SIMD128-NEXT: i32x4.extract_lane $push5=, $0, 3
+; SIMD128-NEXT: i32.and $push6=, $pop4, $pop5
+; SIMD128-NEXT: return $pop6
%res = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %arg)
ret i32 %res
}
@@ -173,16 +174,22 @@ define i16 @pairwise_and_v8i16(<8 x i16> %arg) {
; SIMD128-LABEL: pairwise_and_v8i16:
; SIMD128: .functype pairwise_and_v8i16 (v128) -> (i32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT: v128.and $push8=, $0, $pop0
-; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
-; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT: v128.and $push6=, $pop7, $pop1
-; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
-; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT: v128.and $push3=, $pop5, $pop2
-; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
-; SIMD128-NEXT: return $pop4
+; SIMD128-NEXT: i16x8.extract_lane_u $push1=, $0, 0
+; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $0, 1
+; SIMD128-NEXT: i32.and $push2=, $pop1, $pop0
+; SIMD128-NEXT: i16x8.extract_lane_u $push3=, $0, 2
+; SIMD128-NEXT: i32.and $push4=, $pop2, $pop3
+; SIMD128-NEXT: i16x8.extract_lane_u $push5=, $0, 3
+; SIMD128-NEXT: i32.and $push6=, $pop4, $pop5
+; SIMD128-NEXT: i16x8.extract_lane_u $push7=, $0, 4
+; SIMD128-NEXT: i32.and $push8=, $pop6, $pop7
+; SIMD128-NEXT: i16x8.extract_lane_u $push9=, $0, 5
+; SIMD128-NEXT: i32.and $push10=, $pop8, $pop9
+; SIMD128-NEXT: i16x8.extract_lane_u $push11=, $0, 6
+; SIMD128-NEXT: i32.and $push12=, $pop10, $pop11
+; SIMD128-NEXT: i16x8.extract_lane_u $push13=, $0, 7
+; SIMD128-NEXT: i32.and $push14=, $pop12, $pop13
+; SIMD128-NEXT: return $pop14
%res = tail call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %arg)
ret i16 %res
}
@@ -212,9 +219,9 @@ define i64 @pairwise_or_v2i64(<2 x i64> %arg) {
; SIMD128-LABEL: pairwise_or_v2i64:
; SIMD128: .functype pairwise_or_v2i64 (v128) -> (i64)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
-; SIMD128-NEXT: v128.or $push1=, $0, $pop0
-; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
+; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 0
+; SIMD128-NEXT: i64x2.extract_lane $push0=, $0, 1
+; SIMD128-NEXT: i64.or $push2=, $pop1, $pop0
; SIMD128-NEXT: return $pop2
%res = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %arg)
ret i64 %res
@@ -224,13 +231,14 @@ define i32 @pairwise_or_v4i32(<4 x i32> %arg) {
; SIMD128-LABEL: pairwise_or_v4i32:
; SIMD128: .functype pairwise_or_v4i32 (v128) -> (i32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: v128.or $push5=, $0, $pop0
-; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
-; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: v128.or $push2=, $pop4, $pop1
-; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
-; SIMD128-NEXT: return $pop3
+; SIMD128-NEXT: i32x4.extract_lane $push1=, $0, 0
+; SIMD128-NEXT: i32x4.extract_lane $push0=, $0, 1
+; SIMD128-NEXT: i32.or $push2=, $pop1, $pop0
+; SIMD128-NEXT: i32x4.extract_lane $push3=, $0, 2
+; SIMD128-NEXT: i32.or $push4=, $pop2, $pop3
+; SIMD128-NEXT: i32x4.extract_lane $push5=, $0, 3
+; SIMD128-NEXT: i32.or $push6=, $pop4, $pop5
+; SIMD128-NEXT: return $pop6
%res = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %arg)
ret i32 %res
}
@@ -239,16 +247,22 @@ define i16 @pairwise_or_v8i16(<8 x i16> %arg) {
; SIMD128-LABEL: pairwise_or_v8i16:
; SIMD128: .functype pairwise_or_v8i16 (v128) -> (i32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT: v128.or $push8=, $0, $pop0
-; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
-; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT: v128.or $push6=, $pop7, $pop1
-; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
-; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
-; SIMD128-NEXT: v128.or $push3=, $pop5, $pop2
-; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
-; SIMD128-NEXT: return $pop4
+; SIMD128-NEXT: i16x8.extract_lane_u $push1=, $0, 0
+; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $0, 1
+; SIMD128-NEXT: i32.or $push2=, $pop1, $pop0
+; SIMD128-NEXT: i16x8.extract_lane_u $push3=, $0, 2
+; SIMD128-NEXT: i32.or $push4=, $pop2, $pop3
+; SIMD128-NEXT: i16x8.extract_lane_u $push5=, $0, 3
+; SIMD128-NEXT: i32.or $push6=, $pop4, $pop5
+; SIMD128-NEXT: i16x8.extract_lane_u $push7=, $0, 4
+; SIMD128-NEXT: i32.or $push8=, $pop6, $pop7
+; SIMD128-NEXT: i16x8.extract_lane_u $push9=, $0, 5
+; SIMD128-NEXT: i32.or $push10=, $pop8, $pop9
+; SIMD128-NEXT: i16x8.extract_lane_u $push11=, $0, 6
+; SIMD128-NEXT: i32.or $push12=, $pop10, $pop11
+; SIMD128-NEXT: i16x8.extract_lane_u $push13=, $0, 7
+; SIMD128-NEXT: i32.or $push14=, $pop12, $pop13
+; SIMD128-NEXT: return $pop14
%res = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %arg)
ret i16 %res
}
>From e766566fb60905d6502be117a78ed0c674084a37 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Wed, 4 Feb 2026 21:11:28 +0100
Subject: [PATCH 6/9] no else after return, i guess
---
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 16782deba47bb..ba0225a0da499 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2745,16 +2745,19 @@ static SDValue emitShuffleReduceTree(SelectionDAG &DAG, const SDLoc &DL,
foldInHalf({2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
foldInHalf({1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
return Vec;
- } else if (VecVT == MVT::v8i16) {
+ }
+ if (VecVT == MVT::v8i16) {
foldInHalf({4, 5, 6, 7, 0, 0, 0, 0});
foldInHalf({2, 3, 0, 0, 0, 0, 0, 0});
foldInHalf({1, 0, 0, 0, 0, 0, 0, 0});
return Vec;
- } else if (VecVT == MVT::v4i32) {
+ }
+ if (VecVT == MVT::v4i32) {
foldInHalf({2, 3, 0, 0});
foldInHalf({1, 0, 0, 0});
return Vec;
- } else if (VecVT == MVT::v2i64) {
+ }
+ if (VecVT == MVT::v2i64) {
foldInHalf({1, 0});
return Vec;
}
>From 9b47ab6dc809e33144389964b0d30486724c47e8 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sat, 7 Feb 2026 14:04:03 +0100
Subject: [PATCH 7/9] use `expandVecReduce`
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 30 +------
.../test/CodeGen/WebAssembly/vector-reduce.ll | 90 +++++++++++++------
2 files changed, 65 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index ba0225a0da499..7b673fb5275fa 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1749,7 +1749,7 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_AND:
- return LowerVECREDUCE(Op, DAG);
+ return expandVecReduce(Op.getNode(), DAG);
case ISD::SETCC:
return LowerSETCC(Op, DAG);
case ISD::SHL:
@@ -2765,34 +2765,6 @@ static SDValue emitShuffleReduceTree(SelectionDAG &DAG, const SDLoc &DL,
return SDValue();
}
-SDValue WebAssemblyTargetLowering::LowerVECREDUCE(SDValue Op,
- SelectionDAG &DAG) const {
- const SDLoc DL(Op);
- // Only ISD::VECREDUCE_AND and ISD::VECREDUCE_OR are custom-lowered currently.
- unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
- if (BaseOpc != ISD::AND && BaseOpc != ISD::OR)
- return SDValue();
-
- if (!Subtarget->hasSIMD128())
- return SDValue();
-
- // Expand to a sequence of scalar operations when the vector is small.
- SDValue Vec = Op.getOperand(0);
- EVT VecVT = Vec.getValueType();
- if (VecVT.getVectorNumElements() < 16)
- return SDValue();
-
- SDValue ReducedVec =
- emitShuffleReduceTree(DAG, DL, Op.getOperand(0), BaseOpc);
- if (!ReducedVec)
- return SDValue();
-
- // Extract lane 0 (the reduced value) and convert to the result type.
- EVT EltVT = ReducedVec.getValueType().getVectorElementType();
- SDValue Lane0 = DAG.getExtractVectorElt(DL, EltVT, ReducedVec, 0);
- return DAG.getZExtOrTrunc(Lane0, DL, Op.getValueType());
-}
-
SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
index 982d06230667e..21bf9ed76d449 100644
--- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
+++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
@@ -198,19 +198,38 @@ define i8 @pairwise_and_v16i8(<16 x i8> %arg) {
; SIMD128-LABEL: pairwise_and_v16i8:
; SIMD128: .functype pairwise_and_v16i8 (v128) -> (i32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.and $push11=, $0, $pop0
-; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
-; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.and $push9=, $pop10, $pop1
-; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
-; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.and $push7=, $pop8, $pop2
-; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
-; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.and $push4=, $pop6, $pop3
-; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
-; SIMD128-NEXT: return $pop5
+; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 0
+; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $0, 1
+; SIMD128-NEXT: i32.and $push2=, $pop1, $pop0
+; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $0, 2
+; SIMD128-NEXT: i32.and $push4=, $pop2, $pop3
+; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $0, 3
+; SIMD128-NEXT: i32.and $push6=, $pop4, $pop5
+; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $0, 4
+; SIMD128-NEXT: i32.and $push8=, $pop6, $pop7
+; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 5
+; SIMD128-NEXT: i32.and $push10=, $pop8, $pop9
+; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 6
+; SIMD128-NEXT: i32.and $push12=, $pop10, $pop11
+; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 7
+; SIMD128-NEXT: i32.and $push14=, $pop12, $pop13
+; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $0, 8
+; SIMD128-NEXT: i32.and $push16=, $pop14, $pop15
+; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 9
+; SIMD128-NEXT: i32.and $push18=, $pop16, $pop17
+; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 10
+; SIMD128-NEXT: i32.and $push20=, $pop18, $pop19
+; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 11
+; SIMD128-NEXT: i32.and $push22=, $pop20, $pop21
+; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 12
+; SIMD128-NEXT: i32.and $push24=, $pop22, $pop23
+; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 13
+; SIMD128-NEXT: i32.and $push26=, $pop24, $pop25
+; SIMD128-NEXT: i8x16.extract_lane_u $push27=, $0, 14
+; SIMD128-NEXT: i32.and $push28=, $pop26, $pop27
+; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 15
+; SIMD128-NEXT: i32.and $push30=, $pop28, $pop29
+; SIMD128-NEXT: return $pop30
%res = tail call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %arg)
ret i8 %res
}
@@ -271,19 +290,38 @@ define i8 @pairwise_or_v16i8(<16 x i8> %arg) {
; SIMD128-LABEL: pairwise_or_v16i8:
; SIMD128: .functype pairwise_or_v16i8 (v128) -> (i32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.or $push11=, $0, $pop0
-; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
-; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.or $push9=, $pop10, $pop1
-; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
-; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.or $push7=, $pop8, $pop2
-; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
-; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: v128.or $push4=, $pop6, $pop3
-; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
-; SIMD128-NEXT: return $pop5
+; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 0
+; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $0, 1
+; SIMD128-NEXT: i32.or $push2=, $pop1, $pop0
+; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $0, 2
+; SIMD128-NEXT: i32.or $push4=, $pop2, $pop3
+; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $0, 3
+; SIMD128-NEXT: i32.or $push6=, $pop4, $pop5
+; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $0, 4
+; SIMD128-NEXT: i32.or $push8=, $pop6, $pop7
+; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 5
+; SIMD128-NEXT: i32.or $push10=, $pop8, $pop9
+; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 6
+; SIMD128-NEXT: i32.or $push12=, $pop10, $pop11
+; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 7
+; SIMD128-NEXT: i32.or $push14=, $pop12, $pop13
+; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $0, 8
+; SIMD128-NEXT: i32.or $push16=, $pop14, $pop15
+; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 9
+; SIMD128-NEXT: i32.or $push18=, $pop16, $pop17
+; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 10
+; SIMD128-NEXT: i32.or $push20=, $pop18, $pop19
+; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 11
+; SIMD128-NEXT: i32.or $push22=, $pop20, $pop21
+; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 12
+; SIMD128-NEXT: i32.or $push24=, $pop22, $pop23
+; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 13
+; SIMD128-NEXT: i32.or $push26=, $pop24, $pop25
+; SIMD128-NEXT: i8x16.extract_lane_u $push27=, $0, 14
+; SIMD128-NEXT: i32.or $push28=, $pop26, $pop27
+; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 15
+; SIMD128-NEXT: i32.or $push30=, $pop28, $pop29
+; SIMD128-NEXT: return $pop30
%res = tail call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %arg)
ret i8 %res
}
>From 339b6da2509e8110298537ad98e344cb2d1dd35b Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sat, 7 Feb 2026 14:25:14 +0100
Subject: [PATCH 8/9] remove dead code
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 36 -------------------
1 file changed, 36 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 7b673fb5275fa..eb592e32f25c2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2729,42 +2729,6 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
}
-static SDValue emitShuffleReduceTree(SelectionDAG &DAG, const SDLoc &DL,
- SDValue Vec, unsigned BaseOpc) {
- EVT VecVT = Vec.getValueType();
- assert(VecVT.isVector() && "expected vector");
-
- auto foldInHalf = [&](ArrayRef<int> Mask) -> void {
- SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Vec, Vec, Mask);
- Vec = DAG.getNode(BaseOpc, DL, VecVT, Vec, Shuf);
- };
-
- if (VecVT == MVT::v16i8) {
- foldInHalf({8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0});
- foldInHalf({4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
- foldInHalf({2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
- foldInHalf({1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
- return Vec;
- }
- if (VecVT == MVT::v8i16) {
- foldInHalf({4, 5, 6, 7, 0, 0, 0, 0});
- foldInHalf({2, 3, 0, 0, 0, 0, 0, 0});
- foldInHalf({1, 0, 0, 0, 0, 0, 0, 0});
- return Vec;
- }
- if (VecVT == MVT::v4i32) {
- foldInHalf({2, 3, 0, 0});
- foldInHalf({1, 0, 0, 0});
- return Vec;
- }
- if (VecVT == MVT::v2i64) {
- foldInHalf({1, 0});
- return Vec;
- }
-
- return SDValue();
-}
-
SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
>From 75f87e54c465d5ed23cef5f33e1ad12150678f48 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sat, 7 Feb 2026 17:47:42 +0100
Subject: [PATCH 9/9] remove custom lowering of vecreduce
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 15 ++-------------
.../Target/WebAssembly/WebAssemblyISelLowering.h | 1 -
2 files changed, 2 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index eb592e32f25c2..da674a7657b64 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -244,20 +244,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// We have custom shuffle lowering to expose the shuffle mask
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
- MVT::v2f64}) {
+ MVT::v2f64})
setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
- setOperationAction(ISD::VECREDUCE_OR, T, Custom);
- setOperationAction(ISD::VECREDUCE_AND, T, Custom);
- }
-
- if (Subtarget->hasFP16()) {
+ if (Subtarget->hasFP16())
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom);
- setOperationAction(ISD::VECREDUCE_OR, MVT::v8f16, Custom);
- setOperationAction(ISD::VECREDUCE_AND, MVT::v8f16, Custom);
- }
-
// Support splatting
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64})
@@ -1747,9 +1739,6 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_AND:
- return expandVecReduce(Op.getNode(), DAG);
case ISD::SETCC:
return LowerSETCC(Op, DAG);
case ISD::SHL:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 9e6c4dcb07d1c..204384f06ab25 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -115,7 +115,6 @@ class WebAssemblyTargetLowering final : public TargetLowering {
SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
More information about the llvm-commits
mailing list