[llvm] [DAGCombine] Fold vselect with splat zero (PR #147305)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 20:50:05 PDT 2025
https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/147305
>From a9d15d18b3358a4dfd06196f39afd74140107c2a Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Mon, 7 Jul 2025 05:49:48 -0700
Subject: [PATCH 1/2] [DAGCombine] Precommit test for isConstantSplatVectorAll
Precommit test for isConstantSplatVectorAll in VSelect
---
.../CodeGen/WebAssembly/simd-bitselect.ll | 74 +++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
new file mode 100644
index 0000000000000..6f38e29d2cebc
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+target triple = "wasm32-unknown-unknown"
+
+define <4 x i32> @bitselect_splat_first_zero_and_icmp(<4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_first_zero_and_icmp:
+; CHECK: .functype bitselect_splat_first_zero_and_icmp (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
+; CHECK-NEXT: local.tee $push4=, $1=, $pop5
+; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT: v128.and $push1=, $0, $pop0
+; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
+; CHECK-NEXT: v128.bitselect $push3=, $pop4, $0, $pop2
+; CHECK-NEXT: return $pop3
+start:
+ %0 = and <4 x i32> %input, splat (i32 2139095040)
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = select <4 x i1> %1, <4 x i32> zeroinitializer, <4 x i32> %input
+ ret <4 x i32> %2
+}
+
+
+define <4 x i32> @bitselect_splat_second_zero_and_icmp(<4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_second_zero_and_icmp:
+; CHECK: .functype bitselect_splat_second_zero_and_icmp (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
+; CHECK-NEXT: local.tee $push4=, $1=, $pop5
+; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT: v128.and $push1=, $0, $pop0
+; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
+; CHECK-NEXT: v128.bitselect $push3=, $0, $pop4, $pop2
+; CHECK-NEXT: return $pop3
+start:
+ %0 = and <4 x i32> %input, splat (i32 2139095040)
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = select <4 x i1> %1, <4 x i32> %input, <4 x i32> zeroinitializer
+ ret <4 x i32> %2
+}
+
+
+define <4 x i32> @bitselect_splat_first_zero_cond_input(<4 x i1> %cond, <4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_first_zero_cond_input:
+; CHECK: .functype bitselect_splat_first_zero_cond_input (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push3=, 0, 0, 0, 0
+; CHECK-NEXT: i32.const $push0=, 31
+; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
+; CHECK-NEXT: i32.const $push5=, 31
+; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop5
+; CHECK-NEXT: v128.bitselect $push4=, $pop3, $1, $pop2
+; CHECK-NEXT: return $pop4
+start:
+ %2 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %input
+ ret <4 x i32> %2
+}
+
+define <4 x i32> @bitselect_splat_second_zero_cond_input(<4 x i1> %cond, <4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_second_zero_cond_input:
+; CHECK: .functype bitselect_splat_second_zero_cond_input (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push3=, 0, 0, 0, 0
+; CHECK-NEXT: i32.const $push0=, 31
+; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
+; CHECK-NEXT: i32.const $push5=, 31
+; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop5
+; CHECK-NEXT: v128.bitselect $push4=, $1, $pop3, $pop2
+; CHECK-NEXT: return $pop4
+start:
+ %2 = select <4 x i1> %cond, <4 x i32> %input, <4 x i32> zeroinitializer
+ ret <4 x i32> %2
+}
+
>From 1ad59c0bc85fac51a770c7920d30ba09d487233a Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Wed, 9 Jul 2025 11:13:51 -0700
Subject: [PATCH 2/2] [DAGCombine] Use isConstantSplatVectorAll in VSelect
- Use isConstantSplatVectorAll* in VSelect
- Update tests to reflect this
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +++---
.../CodeGen/WebAssembly/fpclamptosat_vec.ll | 12 +++------
.../CodeGen/WebAssembly/simd-bitselect.ll | 27 +++++++++----------
3 files changed, 20 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9ffdda28f7899..13882d5599127 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13091,10 +13091,10 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
EVT CondVT = Cond.getValueType();
assert(CondVT.isVector() && "Vector select expects a vector selector!");
- bool IsTAllZero = ISD::isBuildVectorAllZeros(TVal.getNode());
- bool IsTAllOne = ISD::isBuildVectorAllOnes(TVal.getNode());
- bool IsFAllZero = ISD::isBuildVectorAllZeros(FVal.getNode());
- bool IsFAllOne = ISD::isBuildVectorAllOnes(FVal.getNode());
+ bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
+ bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
+ bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
+ bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
// no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 1feb5feb7a9ee..7190e162eb010 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -107,11 +107,9 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
@@ -1558,11 +1556,9 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
index 6f38e29d2cebc..4e13dc1454820 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -6,13 +6,12 @@ define <4 x i32> @bitselect_splat_first_zero_and_icmp(<4 x i32> %input) {
; CHECK-LABEL: bitselect_splat_first_zero_and_icmp:
; CHECK: .functype bitselect_splat_first_zero_and_icmp (v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
-; CHECK-NEXT: local.tee $push4=, $1=, $pop5
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
; CHECK-NEXT: v128.and $push1=, $0, $pop0
-; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
-; CHECK-NEXT: v128.bitselect $push3=, $pop4, $0, $pop2
-; CHECK-NEXT: return $pop3
+; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.ne $push3=, $pop1, $pop2
+; CHECK-NEXT: v128.and $push4=, $pop3, $0
+; CHECK-NEXT: return $pop4
start:
%0 = and <4 x i32> %input, splat (i32 2139095040)
%1 = icmp eq <4 x i32> %0, zeroinitializer
@@ -25,13 +24,12 @@ define <4 x i32> @bitselect_splat_second_zero_and_icmp(<4 x i32> %input) {
; CHECK-LABEL: bitselect_splat_second_zero_and_icmp:
; CHECK: .functype bitselect_splat_second_zero_and_icmp (v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
-; CHECK-NEXT: local.tee $push4=, $1=, $pop5
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
; CHECK-NEXT: v128.and $push1=, $0, $pop0
-; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
-; CHECK-NEXT: v128.bitselect $push3=, $0, $pop4, $pop2
-; CHECK-NEXT: return $pop3
+; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.eq $push3=, $pop1, $pop2
+; CHECK-NEXT: v128.and $push4=, $pop3, $0
+; CHECK-NEXT: return $pop4
start:
%0 = and <4 x i32> %input, splat (i32 2139095040)
%1 = icmp eq <4 x i32> %0, zeroinitializer
@@ -60,13 +58,12 @@ define <4 x i32> @bitselect_splat_second_zero_cond_input(<4 x i1> %cond, <4 x i3
; CHECK-LABEL: bitselect_splat_second_zero_cond_input:
; CHECK: .functype bitselect_splat_second_zero_cond_input (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.const $push3=, 0, 0, 0, 0
; CHECK-NEXT: i32.const $push0=, 31
; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
-; CHECK-NEXT: i32.const $push5=, 31
-; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop5
-; CHECK-NEXT: v128.bitselect $push4=, $1, $pop3, $pop2
-; CHECK-NEXT: return $pop4
+; CHECK-NEXT: i32.const $push4=, 31
+; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop4
+; CHECK-NEXT: v128.and $push3=, $pop2, $1
+; CHECK-NEXT: return $pop3
start:
%2 = select <4 x i1> %cond, <4 x i32> %input, <4 x i32> zeroinitializer
ret <4 x i32> %2
More information about the llvm-commits
mailing list