[llvm] [DAGCombine] Fold vselect with splat zero (PR #147305)

Wed Jul 9 20:50:05 PDT 2025

https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/147305

>From a9d15d18b3358a4dfd06196f39afd74140107c2a Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Mon, 7 Jul 2025 05:49:48 -0700
Subject: [PATCH 1/2] [DAGCombine] Precommit test for isConstantSplatVectorAll

Precommit test for isConstantSplatVectorAll in VSelect
---
 .../CodeGen/WebAssembly/simd-bitselect.ll     | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/simd-bitselect.ll

diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
new file mode 100644
index 0000000000000..6f38e29d2cebc
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s  -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+target triple = "wasm32-unknown-unknown"
+
+define <4 x i32> @bitselect_splat_first_zero_and_icmp(<4 x i32>  %input) {
+; CHECK-LABEL: bitselect_splat_first_zero_and_icmp:
+; CHECK:         .functype bitselect_splat_first_zero_and_icmp (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %start
+; CHECK-NEXT:    v128.const $push5=, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push4=, $1=, $pop5
+; CHECK-NEXT:    v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT:    v128.and $push1=, $0, $pop0
+; CHECK-NEXT:    i32x4.eq $push2=, $1, $pop1
+; CHECK-NEXT:    v128.bitselect $push3=, $pop4, $0, $pop2
+; CHECK-NEXT:    return $pop3
+start:
+  %0 = and <4 x i32> %input, splat (i32 2139095040)
+  %1 = icmp eq <4 x i32> %0, zeroinitializer
+  %2 = select <4 x i1> %1, <4 x i32> zeroinitializer, <4 x i32> %input
+  ret <4 x i32> %2
+}
+
+
+define <4 x i32> @bitselect_splat_second_zero_and_icmp(<4 x i32>  %input) {
+; CHECK-LABEL: bitselect_splat_second_zero_and_icmp:
+; CHECK:         .functype bitselect_splat_second_zero_and_icmp (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %start
+; CHECK-NEXT:    v128.const $push5=, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push4=, $1=, $pop5
+; CHECK-NEXT:    v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT:    v128.and $push1=, $0, $pop0
+; CHECK-NEXT:    i32x4.eq $push2=, $1, $pop1
+; CHECK-NEXT:    v128.bitselect $push3=, $0, $pop4, $pop2
+; CHECK-NEXT:    return $pop3
+start:
+  %0 = and <4 x i32> %input, splat (i32 2139095040)
+  %1 = icmp eq <4 x i32> %0, zeroinitializer
+  %2 = select  <4 x i1> %1, <4 x i32> %input, <4 x i32> zeroinitializer
+  ret <4 x i32> %2
+}
+
+
+define <4 x i32> @bitselect_splat_first_zero_cond_input(<4 x i1> %cond, <4 x i32>  %input) {
+; CHECK-LABEL: bitselect_splat_first_zero_cond_input:
+; CHECK:         .functype bitselect_splat_first_zero_cond_input (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %start
+; CHECK-NEXT:    v128.const $push3=, 0, 0, 0, 0
+; CHECK-NEXT:    i32.const $push0=, 31
+; CHECK-NEXT:    i32x4.shl $push1=, $0, $pop0
+; CHECK-NEXT:    i32.const $push5=, 31
+; CHECK-NEXT:    i32x4.shr_s $push2=, $pop1, $pop5
+; CHECK-NEXT:    v128.bitselect $push4=, $pop3, $1, $pop2
+; CHECK-NEXT:    return $pop4
+start:
+  %2 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %input
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @bitselect_splat_second_zero_cond_input(<4 x i1> %cond, <4 x i32>  %input) {
+; CHECK-LABEL: bitselect_splat_second_zero_cond_input:
+; CHECK:         .functype bitselect_splat_second_zero_cond_input (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %start
+; CHECK-NEXT:    v128.const $push3=, 0, 0, 0, 0
+; CHECK-NEXT:    i32.const $push0=, 31
+; CHECK-NEXT:    i32x4.shl $push1=, $0, $pop0
+; CHECK-NEXT:    i32.const $push5=, 31
+; CHECK-NEXT:    i32x4.shr_s $push2=, $pop1, $pop5
+; CHECK-NEXT:    v128.bitselect $push4=, $1, $pop3, $pop2
+; CHECK-NEXT:    return $pop4
+start:
+  %2 = select  <4 x i1> %cond, <4 x i32> %input, <4 x i32> zeroinitializer
+  ret <4 x i32> %2
+}
+

>From 1ad59c0bc85fac51a770c7920d30ba09d487233a Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Wed, 9 Jul 2025 11:13:51 -0700
Subject: [PATCH 2/2] [DAGCombine] Use isConstantSplatVectorAll in VSelect

- Use isConstantSplatVectorAll* in VSelect
- Update tests to reflect this
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  8 +++---
 .../CodeGen/WebAssembly/fpclamptosat_vec.ll   | 12 +++------
 .../CodeGen/WebAssembly/simd-bitselect.ll     | 27 +++++++++----------
 3 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9ffdda28f7899..13882d5599127 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13091,10 +13091,10 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
   EVT CondVT = Cond.getValueType();
   assert(CondVT.isVector() && "Vector select expects a vector selector!");
 
-  bool IsTAllZero = ISD::isBuildVectorAllZeros(TVal.getNode());
-  bool IsTAllOne = ISD::isBuildVectorAllOnes(TVal.getNode());
-  bool IsFAllZero = ISD::isBuildVectorAllZeros(FVal.getNode());
-  bool IsFAllOne = ISD::isBuildVectorAllOnes(FVal.getNode());
+  bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
+  bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
+  bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
+  bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
 
   // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
   if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 1feb5feb7a9ee..7190e162eb010 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -107,11 +107,9 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    v128.bitselect
 ; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    v128.const 0, 0
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    i64x2.gt_s
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    v128.and
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
 ; CHECK-NEXT:    # fallthrough-return
@@ -1558,11 +1556,9 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    v128.bitselect
 ; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    v128.const 0, 0
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    i64x2.gt_s
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    v128.and
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
 ; CHECK-NEXT:    # fallthrough-return
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
index 6f38e29d2cebc..4e13dc1454820 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -6,13 +6,12 @@ define <4 x i32> @bitselect_splat_first_zero_and_icmp(<4 x i32>  %input) {
 ; CHECK-LABEL: bitselect_splat_first_zero_and_icmp:
 ; CHECK:         .functype bitselect_splat_first_zero_and_icmp (v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0: # %start
-; CHECK-NEXT:    v128.const $push5=, 0, 0, 0, 0
-; CHECK-NEXT:    local.tee $push4=, $1=, $pop5
 ; CHECK-NEXT:    v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
 ; CHECK-NEXT:    v128.and $push1=, $0, $pop0
-; CHECK-NEXT:    i32x4.eq $push2=, $1, $pop1
-; CHECK-NEXT:    v128.bitselect $push3=, $pop4, $0, $pop2
-; CHECK-NEXT:    return $pop3
+; CHECK-NEXT:    v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.ne $push3=, $pop1, $pop2
+; CHECK-NEXT:    v128.and $push4=, $pop3, $0
+; CHECK-NEXT:    return $pop4
 start:
   %0 = and <4 x i32> %input, splat (i32 2139095040)
   %1 = icmp eq <4 x i32> %0, zeroinitializer
@@ -25,13 +24,12 @@ define <4 x i32> @bitselect_splat_second_zero_and_icmp(<4 x i32>  %input) {
 ; CHECK-LABEL: bitselect_splat_second_zero_and_icmp:
 ; CHECK:         .functype bitselect_splat_second_zero_and_icmp (v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0: # %start
-; CHECK-NEXT:    v128.const $push5=, 0, 0, 0, 0
-; CHECK-NEXT:    local.tee $push4=, $1=, $pop5
 ; CHECK-NEXT:    v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
 ; CHECK-NEXT:    v128.and $push1=, $0, $pop0
-; CHECK-NEXT:    i32x4.eq $push2=, $1, $pop1
-; CHECK-NEXT:    v128.bitselect $push3=, $0, $pop4, $pop2
-; CHECK-NEXT:    return $pop3
+; CHECK-NEXT:    v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push3=, $pop1, $pop2
+; CHECK-NEXT:    v128.and $push4=, $pop3, $0
+; CHECK-NEXT:    return $pop4
 start:
   %0 = and <4 x i32> %input, splat (i32 2139095040)
   %1 = icmp eq <4 x i32> %0, zeroinitializer
@@ -60,13 +58,12 @@ define <4 x i32> @bitselect_splat_second_zero_cond_input(<4 x i1> %cond, <4 x i3
 ; CHECK-LABEL: bitselect_splat_second_zero_cond_input:
 ; CHECK:         .functype bitselect_splat_second_zero_cond_input (v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0: # %start
-; CHECK-NEXT:    v128.const $push3=, 0, 0, 0, 0
 ; CHECK-NEXT:    i32.const $push0=, 31
 ; CHECK-NEXT:    i32x4.shl $push1=, $0, $pop0
-; CHECK-NEXT:    i32.const $push5=, 31
-; CHECK-NEXT:    i32x4.shr_s $push2=, $pop1, $pop5
-; CHECK-NEXT:    v128.bitselect $push4=, $1, $pop3, $pop2
-; CHECK-NEXT:    return $pop4
+; CHECK-NEXT:    i32.const $push4=, 31
+; CHECK-NEXT:    i32x4.shr_s $push2=, $pop1, $pop4
+; CHECK-NEXT:    v128.and $push3=, $pop2, $1
+; CHECK-NEXT:    return $pop3
 start:
   %2 = select  <4 x i1> %cond, <4 x i32> %input, <4 x i32> zeroinitializer
   ret <4 x i32> %2