[llvm] [WebAssembly] combine `bitmask` with `setcc <X>, 0, setlt` (PR #179065)
Folkert de Vries via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 31 12:48:30 PST 2026
https://github.com/folkertdev created https://github.com/llvm/llvm-project/pull/179065
The rust `simd_bitmask` intrinsic is UB when the lanes of its input are not either `0` or `!0`, presumably so that the implementation can be more efficient because it could look at any bit. To get the "mask of MSB" behavior of webassembly's `bitmask`, we would like to simply first compare with a zero vector.
```llvm
define i32 @example(<2 x i64> noundef %v) {
entry:
%1 = icmp slt <16 x i8> %v, zeroinitializer
%2 = bitcast <16 x i1> %1 to i16
%3 = zext i16 %2 to i32
ret i32 %3
}
```
On x86_64, this additional comparison optimizes away, but for wasm it does not.
https://godbolt.org/z/T5sPejocs
This PR adds a new combine, so that instead of emitting
```asm
example:
local.get 0
v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
i8x16.lt_s
i8x16.bitmask
end_function
```
we just emit
```
example:
local.get 0
i8x16.bitmask
end_function
```
>From 4c2e6905107ea3ad6387f8771bd383e31a46d972 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sat, 31 Jan 2026 21:40:05 +0100
Subject: [PATCH] [WebAssembly] combine `bitmask` with `setcc <X>, 0, setlt`
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 31 ++++++++++-
llvm/test/CodeGen/WebAssembly/simd-bitmask.ll | 52 +++++++++++++++++++
2 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 5abf0e8f59d2a..f9119f1425b9e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3315,6 +3315,32 @@ static SDValue performBitcastCombine(SDNode *N,
return SDValue();
}
+static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG) {
+ // bitmask (setcc <X>, 0, setlt) => bitmask X
+ assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
+ using namespace llvm::SDPatternMatch;
+
+ SDValue LHS;
+ if (N->getNumOperands() < 2 ||
+ N->getConstantOperandVal(0) != Intrinsic::wasm_bitmask ||
+ !sd_match(N->getOperand(1),
+ m_c_SetCC(m_Value(LHS), m_Zero(), m_CondCode())))
+ return SDValue();
+
+ EVT LT = LHS.getValueType();
+ if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
+ return SDValue();
+
+ if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
+ m_SpecificCondCode(ISD::SETLT))))
+ return SDValue();
+
+ SDLoc DL(N);
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
+ {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), LHS});
+}
+
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
// any_true (setcc <X>, 0, eq) => (not (all_true X))
// all_true (setcc <X>, 0, eq) => (not (any_true X))
@@ -3730,8 +3756,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
return performConvertFPCombine(N, DCI.DAG);
case ISD::TRUNCATE:
return performTruncateCombine(N, DCI);
- case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN: {
+ if (SDValue V = performBitmaskCombine(N, DCI.DAG))
+ return V;
return performAnyAllCombine(N, DCI.DAG);
+ }
case ISD::MUL:
return performMulCombine(N, DCI);
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitmask.ll b/llvm/test/CodeGen/WebAssembly/simd-bitmask.ll
index c2a43a839b1e5..8c0a360e26f0c 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-bitmask.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitmask.ll
@@ -196,3 +196,55 @@ define i32 @bitmask_v32i8(<32 x i8> %v) {
%bitmask = bitcast <32 x i1> %cmp to i32
ret i32 %bitmask
}
+
+define i32 @manual_bitmask_i8x16(<16 x i8> %v) {
+; CHECK-LABEL: manual_bitmask_i8x16:
+; CHECK: .functype manual_bitmask_i8x16 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.bitmask
+; CHECK-NEXT: # fallthrough-return
+ %1 = icmp slt <16 x i8> %v, zeroinitializer
+ %2 = bitcast <16 x i1> %1 to i16
+ %3 = zext i16 %2 to i32
+ ret i32 %3
+}
+
+define i32 @manual_bitmask_i16x8(<8 x i16> %v) {
+; CHECK-LABEL: manual_bitmask_i16x8:
+; CHECK: .functype manual_bitmask_i16x8 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.bitmask
+; CHECK-NEXT: # fallthrough-return
+ %1 = icmp slt <8 x i16> %v, zeroinitializer
+ %2 = bitcast <8 x i1> %1 to i8
+ %3 = zext i8 %2 to i32
+ ret i32 %3
+}
+
+define i32 @manual_bitmask_i32x4(<4 x i32> %v) {
+; CHECK-LABEL: manual_bitmask_i32x4:
+; CHECK: .functype manual_bitmask_i32x4 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.bitmask
+; CHECK-NEXT: # fallthrough-return
+ %1 = icmp slt <4 x i32> %v, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = zext i4 %2 to i32
+ ret i32 %3
+}
+
+define i32 @manual_bitmask_i64x2(<2 x i64> %v) {
+; CHECK-LABEL: manual_bitmask_i64x2:
+; CHECK: .functype manual_bitmask_i64x2 (v128) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64x2.bitmask
+; CHECK-NEXT: # fallthrough-return
+ %1 = icmp slt <2 x i64> %v, zeroinitializer
+ %2 = bitcast <2 x i1> %1 to i2
+ %3 = zext i2 %2 to i32
+ ret i32 %3
+}
More information about the llvm-commits
mailing list