[llvm] [WASM] Fold bitselect with splat zero (PR #147305)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 03:11:53 PDT 2025
https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/147305
>From 4f7a27c953b96ee7733fc41b732b399914d1dd28 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Mon, 7 Jul 2025 05:49:48 -0700
Subject: [PATCH 1/3] [WASM] Precommit test for #73454
---
.../CodeGen/WebAssembly/simd-bitselect.ll | 50 +++++++++++++++++++
1 file changed, 50 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
new file mode 100644
index 0000000000000..3bc447da0c85f
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+target triple = "wasm32-unknown-unknown"
+
+define void @bitselect_first_zero(ptr %output, ptr %input) {
+; CHECK-LABEL: bitselect_first_zero:
+; CHECK: .functype bitselect_first_zero (i32, i32) -> ()
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push7=, 0, 0, 0, 0
+; CHECK-NEXT: local.tee $push6=, $3=, $pop7
+; CHECK-NEXT: v128.load $push5=, 0($1)
+; CHECK-NEXT: local.tee $push4=, $2=, $pop5
+; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT: v128.and $push1=, $2, $pop0
+; CHECK-NEXT: i32x4.eq $push2=, $3, $pop1
+; CHECK-NEXT: v128.bitselect $push3=, $pop6, $pop4, $pop2
+; CHECK-NEXT: v128.store 0($0), $pop3
+; CHECK-NEXT: return
+start:
+ %input.val = load <4 x i32>, ptr %input, align 16
+ %0 = and <4 x i32> %input.val, splat (i32 2139095040)
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = select <4 x i1> %1, <4 x i32> zeroinitializer, <4 x i32> %input.val
+ store <4 x i32> %2, ptr %output, align 16
+ ret void
+}
+
+
+define void @bitselect_second_zero(ptr %output, ptr %input) {
+; CHECK-LABEL: bitselect_second_zero:
+; CHECK: .functype bitselect_second_zero (i32, i32) -> ()
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.load $push7=, 0($1)
+; CHECK-NEXT: local.tee $push6=, $2=, $pop7
+; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
+; CHECK-NEXT: local.tee $push4=, $3=, $pop5
+; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT: v128.and $push1=, $2, $pop0
+; CHECK-NEXT: i32x4.eq $push2=, $3, $pop1
+; CHECK-NEXT: v128.bitselect $push3=, $pop6, $pop4, $pop2
+; CHECK-NEXT: v128.store 0($0), $pop3
+; CHECK-NEXT: return
+start:
+ %input.val = load <4 x i32>, ptr %input, align 16
+ %0 = and <4 x i32> %input.val, splat (i32 2139095040)
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = select <4 x i1> %1, <4 x i32> %input.val, <4 x i32> zeroinitializer
+ store <4 x i32> %2, ptr %output, align 16
+ ret void
+}
>From b00d0c543c7be82ad936e29ebff45989e2bc461a Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Mon, 7 Jul 2025 06:01:30 -0700
Subject: [PATCH 2/3] [WASM] Fold bitselect with argument <0>
Fixes #73454
Fold bitselect with the splat <0> in either first or second
argument.
- For first argument <0>: vselect <0>, X, Y -> Y
- For second argument <0>: vselect Y, <0>, X -> AND(<X>, !<Y>)
- For third argument <0>: vselect X, Y, <0> -> AND(<Y>, <X>)
Detailed explanation in the implementation.
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 63 +++++++++++++++++++
.../CodeGen/WebAssembly/simd-bitselect.ll | 26 ++++----
2 files changed, 75 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index bf2e04caa0a61..8009bfcc861c3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -191,6 +191,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Combine vector mask reductions into alltrue/anytrue
setTargetDAGCombine(ISD::SETCC);
+ // Convert vselect of various zero arguments to AND
+ setTargetDAGCombine(ISD::VSELECT);
+
// Convert vector to integer bitcasts to bitmask
setTargetDAGCombine(ISD::BITCAST);
@@ -3210,6 +3213,64 @@ static SDValue performTruncateCombine(SDNode *N,
return truncateVectorWithNARROW(OutVT, In, DL, DAG);
}
+static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
+ // In the tablegen.td, vselect A B C -> bitselect B C A
+
+ // SCENARIO A
+ // vselect <0>, X, Y
+ // -> bitselect X, Y, <0>
+ // -> or (AND(X, <0>), AND(<Y>, !<0>))
+ // -> or (0, AND(<Y>, !<0>))
+ // -> AND(Y, !<0>)
+ // -> AND(Y, 1)
+ // -> Y
+
+ // SCENARIO B
+ // vselect Y, <0>, X
+ // -> bitselect <0>, X, Y
+ // -> or (AND(<0>, Y), AND(<X>, !<Y>))
+ // -> or (0, AND(<X>, !<Y>))
+ // -> AND(<X>, !<Y>)
+
+ // SCENARIO C
+ // vselect X, Y, <0>
+ // -> bitselect Y, <0>, X
+ // -> or (AND(Y, X), AND(<0>, !X))
+ // -> or (AND(Y, X), <0>)
+ // -> AND(Y, X)
+
+ using namespace llvm::SDPatternMatch;
+ assert(N->getOpcode() == ISD::VSELECT);
+
+ SDLoc DL(N);
+
+ SDValue Cond = N->getOperand(0), LHS = N->getOperand(1),
+ RHS = N->getOperand(2);
+ EVT NVT = N->getValueType(0);
+
+ APInt SplatValue;
+
+ // SCENARIO A
+ if (ISD::isConstantSplatVector(Cond.getNode(), SplatValue) &&
+ SplatValue.isZero())
+ return RHS;
+
+ // SCENARIO B
+ if (ISD::isConstantSplatVector(LHS.getNode(), SplatValue) &&
+ SplatValue.isZero())
+ return DAG.getNode(
+ ISD::AND, DL, NVT,
+ {RHS, DAG.getSExtOrTrunc(DAG.getNOT(DL, Cond, Cond.getValueType()), DL,
+ NVT)});
+
+ // SCENARIO C
+ if (ISD::isConstantSplatVector(RHS.getNode(), SplatValue) &&
+ SplatValue.isZero())
+ return DAG.getNode(ISD::AND, DL, NVT,
+ {LHS, DAG.getSExtOrTrunc(Cond, DL, NVT)});
+ return SDValue();
+}
+
static SDValue performBitcastCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
using namespace llvm::SDPatternMatch;
@@ -3505,6 +3566,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default:
return SDValue();
+ case ISD::VSELECT:
+ return performVSelectCombine(N, DCI.DAG);
case ISD::BITCAST:
return performBitcastCombine(N, DCI);
case ISD::SETCC:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
index 3bc447da0c85f..7803709dc8b4a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -6,15 +6,14 @@ define void @bitselect_first_zero(ptr %output, ptr %input) {
; CHECK-LABEL: bitselect_first_zero:
; CHECK: .functype bitselect_first_zero (i32, i32) -> ()
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.const $push7=, 0, 0, 0, 0
-; CHECK-NEXT: local.tee $push6=, $3=, $pop7
-; CHECK-NEXT: v128.load $push5=, 0($1)
-; CHECK-NEXT: local.tee $push4=, $2=, $pop5
+; CHECK-NEXT: v128.load $push6=, 0($1)
+; CHECK-NEXT: local.tee $push5=, $2=, $pop6
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
; CHECK-NEXT: v128.and $push1=, $2, $pop0
-; CHECK-NEXT: i32x4.eq $push2=, $3, $pop1
-; CHECK-NEXT: v128.bitselect $push3=, $pop6, $pop4, $pop2
-; CHECK-NEXT: v128.store 0($0), $pop3
+; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.ne $push3=, $pop1, $pop2
+; CHECK-NEXT: v128.and $push4=, $pop5, $pop3
+; CHECK-NEXT: v128.store 0($0), $pop4
; CHECK-NEXT: return
start:
%input.val = load <4 x i32>, ptr %input, align 16
@@ -30,15 +29,14 @@ define void @bitselect_second_zero(ptr %output, ptr %input) {
; CHECK-LABEL: bitselect_second_zero:
; CHECK: .functype bitselect_second_zero (i32, i32) -> ()
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.load $push7=, 0($1)
-; CHECK-NEXT: local.tee $push6=, $2=, $pop7
-; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
-; CHECK-NEXT: local.tee $push4=, $3=, $pop5
+; CHECK-NEXT: v128.load $push6=, 0($1)
+; CHECK-NEXT: local.tee $push5=, $2=, $pop6
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
; CHECK-NEXT: v128.and $push1=, $2, $pop0
-; CHECK-NEXT: i32x4.eq $push2=, $3, $pop1
-; CHECK-NEXT: v128.bitselect $push3=, $pop6, $pop4, $pop2
-; CHECK-NEXT: v128.store 0($0), $pop3
+; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.eq $push3=, $pop1, $pop2
+; CHECK-NEXT: v128.and $push4=, $pop5, $pop3
+; CHECK-NEXT: v128.store 0($0), $pop4
; CHECK-NEXT: return
start:
%input.val = load <4 x i32>, ptr %input, align 16
>From ee0da14bcfa51083c3e9e269a26b378a9465c4eb Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Tue, 8 Jul 2025 03:01:25 -0700
Subject: [PATCH 3/3] [WASM] Remove redundant zero folding and fix test
- Remove redundant zero folding in performVSelectCombine()
- Fix fpclamptosat_vec.ll test with pre-legalization folding of Vselect.
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 34 +++++++------------
.../CodeGen/WebAssembly/fpclamptosat_vec.ll | 12 +++----
2 files changed, 17 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 8009bfcc861c3..1529e198235e3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3213,26 +3213,18 @@ static SDValue performTruncateCombine(SDNode *N,
return truncateVectorWithNARROW(OutVT, In, DL, DAG);
}
-static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performVSelectCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
// In the tablegen.td, vselect A B C -> bitselect B C A
- // SCENARIO A
- // vselect <0>, X, Y
- // -> bitselect X, Y, <0>
- // -> or (AND(X, <0>), AND(<Y>, !<0>))
- // -> or (0, AND(<Y>, !<0>))
- // -> AND(Y, !<0>)
- // -> AND(Y, 1)
- // -> Y
-
- // SCENARIO B
+ // SCENARIO 1
// vselect Y, <0>, X
// -> bitselect <0>, X, Y
// -> or (AND(<0>, Y), AND(<X>, !<Y>))
// -> or (0, AND(<X>, !<Y>))
// -> AND(<X>, !<Y>)
- // SCENARIO C
+ // SCENARIO 2
// vselect X, Y, <0>
// -> bitselect Y, <0>, X
// -> or (AND(Y, X), AND(<0>, !X))
@@ -3242,20 +3234,20 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
using namespace llvm::SDPatternMatch;
assert(N->getOpcode() == ISD::VSELECT);
+ // INFO: There is degradation in performance pre-legalization,
+ // fpclamptosat_vec.ll
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
SDLoc DL(N);
SDValue Cond = N->getOperand(0), LHS = N->getOperand(1),
RHS = N->getOperand(2);
EVT NVT = N->getValueType(0);
-
+ SelectionDAG &DAG = DCI.DAG;
APInt SplatValue;
- // SCENARIO A
- if (ISD::isConstantSplatVector(Cond.getNode(), SplatValue) &&
- SplatValue.isZero())
- return RHS;
-
- // SCENARIO B
+ // SCENARIO 1
if (ISD::isConstantSplatVector(LHS.getNode(), SplatValue) &&
SplatValue.isZero())
return DAG.getNode(
@@ -3263,7 +3255,7 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
{RHS, DAG.getSExtOrTrunc(DAG.getNOT(DL, Cond, Cond.getValueType()), DL,
NVT)});
- // SCENARIO C
+ // SCENARIO 2
if (ISD::isConstantSplatVector(RHS.getNode(), SplatValue) &&
SplatValue.isZero())
return DAG.getNode(ISD::AND, DL, NVT,
@@ -3567,7 +3559,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
default:
return SDValue();
case ISD::VSELECT:
- return performVSelectCombine(N, DCI.DAG);
+ return performVSelectCombine(N, DCI);
case ISD::BITCAST:
return performBitcastCombine(N, DCI);
case ISD::SETCC:
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 1feb5feb7a9ee..dc110a83f80c4 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -106,12 +106,10 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-NEXT: i64x2.lt_s
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.tee 0
-; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.const 0, 0
; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
@@ -1557,12 +1555,10 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: i64x2.lt_s
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.tee 0
-; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.const 0, 0
; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
More information about the llvm-commits
mailing list