[llvm] [WASM] Fold bitselect with splat zero (PR #147305)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 12:48:57 PDT 2025
https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/147305
>From a6245830a2c7c0c6e846c9f10583016b2d8429e5 Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Tue, 8 Jul 2025 15:25:02 +0800
Subject: [PATCH 1/4] [DAG] combineVSelectWithAllOnesOrZeros - fold select
Cond, 0, x -> and not(Cond), x
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +++++-
llvm/lib/Target/X86/X86ISelLowering.cpp | 54 -------------------
.../AArch64/sve-fixed-length-shuffles.ll | 29 ++++------
.../test/CodeGen/AArch64/vselect-constants.ll | 6 +--
4 files changed, 26 insertions(+), 79 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e7f1fdf10719a..4f06e45cdd0c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9902,11 +9902,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
- // fold !(x cc y) -> (x !cc y)
+ // fold xor (setcc x y cc) -1 -> setcc x y !cc
+ // Avoid breaking: and (xor (setcc x y cc) -1) z -> andn for vec
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1) &&
- isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
+ isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
+ !(N->hasOneUse() && TLI.hasAndNot(SDValue(N, 0)) &&
+ N->use_begin()->getUser()->getOpcode() == ISD::AND && VT.isVector())) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
@@ -13165,6 +13168,15 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
return DAG.getBitcast(VT, And);
}
+ // select Cond, 0, x -> and not(Cond), x
+ if (IsTAllZero &&
+ (isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
+ SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue And =
+ DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
+ return DAG.getBitcast(VT, And);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fae80f25f71d1..fd617f7062313 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47260,57 +47260,6 @@ static SDValue combineToExtendBoolVectorInReg(
DAG.getConstant(EltSizeInBits - 1, DL, VT));
}
-/// If a vector select has an left operand that is 0, try to simplify the
-/// select to a bitwise logic operation.
-/// TODO: Move to DAGCombiner.combineVSelectWithAllOnesOrZeros, possibly using
-/// TargetLowering::hasAndNot()?
-static SDValue combineVSelectWithLastZeros(SDNode *N, SelectionDAG &DAG,
- const SDLoc &DL,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- SDValue Cond = N->getOperand(0);
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
- EVT VT = LHS.getValueType();
- EVT CondVT = Cond.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- if (N->getOpcode() != ISD::VSELECT)
- return SDValue();
-
- assert(CondVT.isVector() && "Vector select expects a vector selector!");
-
- // To use the condition operand as a bitwise mask, it must have elements that
- // are the same size as the select elements. Ie, the condition operand must
- // have already been promoted from the IR select condition type <N x i1>.
- // Don't check if the types themselves are equal because that excludes
- // vector floating-point selects.
- if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
- return SDValue();
-
- // Cond value must be 'sign splat' to be converted to a logical op.
- if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
- return SDValue();
-
- if (!TLI.isTypeLegal(CondVT))
- return SDValue();
-
- // vselect Cond, 000..., X -> andn Cond, X
- if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
- SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
- SDValue AndN;
- // The canonical form differs for i1 vectors - x86andnp is not used
- if (CondVT.getScalarType() == MVT::i1)
- AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
- CastRHS);
- else
- AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
- return DAG.getBitcast(VT, AndN);
- }
-
- return SDValue();
-}
-
/// If both arms of a vector select are concatenated vectors, split the select,
/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
/// vselect Cond, (concat T0, T1), (concat F0, F1) -->
@@ -48052,9 +48001,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget))
return SDValue();
- if (SDValue V = combineVSelectWithLastZeros(N, DAG, DL, DCI, Subtarget))
- return V;
-
if (SDValue V = combineVSelectToBLENDV(N, DAG, DL, DCI, Subtarget))
return V;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index d916f26f9b26b..c48ee3939bd2e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -30,7 +30,9 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: // %bb.1: // %vector.body
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z4, [x0]
+; CHECK-NEXT: ldr z5, [x0, #2, mul vl]
+; CHECK-NEXT: ldr z6, [x0, #3, mul vl]
; CHECK-NEXT: umov w8, v0.b[8]
; CHECK-NEXT: mov v1.b[1], v0.b[1]
; CHECK-NEXT: fmov s2, w8
@@ -60,31 +62,20 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: asr z1.s, z1.s, #31
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: lsl z0.s, z0.s, #31
-; CHECK-NEXT: and z1.s, z1.s, #0x1
+; CHECK-NEXT: bic z1.d, z4.d, z1.d
; CHECK-NEXT: lsl z2.s, z2.s, #31
+; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
-; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z1, [x0]
; CHECK-NEXT: lsl z3.s, z3.s, #31
; CHECK-NEXT: asr z2.s, z2.s, #31
-; CHECK-NEXT: and z0.s, z0.s, #0x1
+; CHECK-NEXT: bic z0.d, z5.d, z0.d
; CHECK-NEXT: asr z3.s, z3.s, #31
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
-; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
-; CHECK-NEXT: ldr z0, [x0, #2, mul vl]
-; CHECK-NEXT: and z3.s, z3.s, #0x1
-; CHECK-NEXT: str z1, [x0]
-; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: ldr z3, [x0, #3, mul vl]
-; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
-; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
-; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT: bic z1.d, z4.d, z2.d
; CHECK-NEXT: str z0, [x0, #2, mul vl]
+; CHECK-NEXT: bic z3.d, z6.d, z3.d
+; CHECK-NEXT: str z1, [x0, #1, mul vl]
; CHECK-NEXT: str z3, [x0, #3, mul vl]
-; CHECK-NEXT: str z2, [x0, #1, mul vl]
; CHECK-NEXT: .LBB1_2: // %exit
; CHECK-NEXT: ret
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll
index fe125c9626ea3..3c1f06e0e4ed1 100644
--- a/llvm/test/CodeGen/AArch64/vselect-constants.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll
@@ -169,11 +169,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_0_or_1_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: cmge v0.4s, v0.4s, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %add
>From 41eac8fc0a63e500f6a4dab5716b8e3f0c7f6aa1 Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Tue, 8 Jul 2025 18:24:25 +0800
Subject: [PATCH 2/4] fix: review
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4f06e45cdd0c1..94cb529d9899d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9902,14 +9902,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
- // fold xor (setcc x y cc) -1 -> setcc x y !cc
- // Avoid breaking: and (xor (setcc x y cc) -1) z -> andn for vec
+ // fold not (setcc x, y, cc) -> setcc x y !cc
+ // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1) &&
isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
- !(N->hasOneUse() && TLI.hasAndNot(SDValue(N, 0)) &&
- N->use_begin()->getUser()->getOpcode() == ISD::AND && VT.isVector())) {
+ !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
+ N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
>From 2f3a8d0e194cab45f42b6bf6aa5939b60ff8d82b Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Mon, 7 Jul 2025 05:49:48 -0700
Subject: [PATCH 3/4] [DAGCombine] Precommit test for isConstantSplatVectorAll
Precommit test for isConstantSplatVectorAll in VSelect
---
.../CodeGen/WebAssembly/simd-bitselect.ll | 74 +++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
new file mode 100644
index 0000000000000..6f38e29d2cebc
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+target triple = "wasm32-unknown-unknown"
+
+define <4 x i32> @bitselect_splat_first_zero_and_icmp(<4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_first_zero_and_icmp:
+; CHECK: .functype bitselect_splat_first_zero_and_icmp (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
+; CHECK-NEXT: local.tee $push4=, $1=, $pop5
+; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT: v128.and $push1=, $0, $pop0
+; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
+; CHECK-NEXT: v128.bitselect $push3=, $pop4, $0, $pop2
+; CHECK-NEXT: return $pop3
+start:
+ %0 = and <4 x i32> %input, splat (i32 2139095040)
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = select <4 x i1> %1, <4 x i32> zeroinitializer, <4 x i32> %input
+ ret <4 x i32> %2
+}
+
+
+define <4 x i32> @bitselect_splat_second_zero_and_icmp(<4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_second_zero_and_icmp:
+; CHECK: .functype bitselect_splat_second_zero_and_icmp (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
+; CHECK-NEXT: local.tee $push4=, $1=, $pop5
+; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
+; CHECK-NEXT: v128.and $push1=, $0, $pop0
+; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
+; CHECK-NEXT: v128.bitselect $push3=, $0, $pop4, $pop2
+; CHECK-NEXT: return $pop3
+start:
+ %0 = and <4 x i32> %input, splat (i32 2139095040)
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = select <4 x i1> %1, <4 x i32> %input, <4 x i32> zeroinitializer
+ ret <4 x i32> %2
+}
+
+
+define <4 x i32> @bitselect_splat_first_zero_cond_input(<4 x i1> %cond, <4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_first_zero_cond_input:
+; CHECK: .functype bitselect_splat_first_zero_cond_input (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push3=, 0, 0, 0, 0
+; CHECK-NEXT: i32.const $push0=, 31
+; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
+; CHECK-NEXT: i32.const $push5=, 31
+; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop5
+; CHECK-NEXT: v128.bitselect $push4=, $pop3, $1, $pop2
+; CHECK-NEXT: return $pop4
+start:
+ %2 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %input
+ ret <4 x i32> %2
+}
+
+define <4 x i32> @bitselect_splat_second_zero_cond_input(<4 x i1> %cond, <4 x i32> %input) {
+; CHECK-LABEL: bitselect_splat_second_zero_cond_input:
+; CHECK: .functype bitselect_splat_second_zero_cond_input (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: v128.const $push3=, 0, 0, 0, 0
+; CHECK-NEXT: i32.const $push0=, 31
+; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
+; CHECK-NEXT: i32.const $push5=, 31
+; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop5
+; CHECK-NEXT: v128.bitselect $push4=, $1, $pop3, $pop2
+; CHECK-NEXT: return $pop4
+start:
+ %2 = select <4 x i1> %cond, <4 x i32> %input, <4 x i32> zeroinitializer
+ ret <4 x i32> %2
+}
+
>From b3e58eaa7742684ae8f235eb24009d00e611f024 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Wed, 9 Jul 2025 11:13:51 -0700
Subject: [PATCH 4/4] [DAGCombine] Use isConstantSplatVectorAll in VSelect
- Use isConstantSplatVectorAll* in VSelect
- Update tests to reflect this
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +++---
.../CodeGen/WebAssembly/fpclamptosat_vec.ll | 12 +++------
.../CodeGen/WebAssembly/simd-bitselect.ll | 27 +++++++++----------
3 files changed, 20 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 94cb529d9899d..eff2925b808ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13091,10 +13091,10 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
EVT CondVT = Cond.getValueType();
assert(CondVT.isVector() && "Vector select expects a vector selector!");
- bool IsTAllZero = ISD::isBuildVectorAllZeros(TVal.getNode());
- bool IsTAllOne = ISD::isBuildVectorAllOnes(TVal.getNode());
- bool IsFAllZero = ISD::isBuildVectorAllZeros(FVal.getNode());
- bool IsFAllOne = ISD::isBuildVectorAllOnes(FVal.getNode());
+ bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
+ bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
+ bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
+ bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
// no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 1feb5feb7a9ee..7190e162eb010 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -107,11 +107,9 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
@@ -1558,11 +1556,9 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
index 6f38e29d2cebc..4e13dc1454820 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-bitselect.ll
@@ -6,13 +6,12 @@ define <4 x i32> @bitselect_splat_first_zero_and_icmp(<4 x i32> %input) {
; CHECK-LABEL: bitselect_splat_first_zero_and_icmp:
; CHECK: .functype bitselect_splat_first_zero_and_icmp (v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
-; CHECK-NEXT: local.tee $push4=, $1=, $pop5
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
; CHECK-NEXT: v128.and $push1=, $0, $pop0
-; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
-; CHECK-NEXT: v128.bitselect $push3=, $pop4, $0, $pop2
-; CHECK-NEXT: return $pop3
+; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.ne $push3=, $pop1, $pop2
+; CHECK-NEXT: v128.and $push4=, $pop3, $0
+; CHECK-NEXT: return $pop4
start:
%0 = and <4 x i32> %input, splat (i32 2139095040)
%1 = icmp eq <4 x i32> %0, zeroinitializer
@@ -25,13 +24,12 @@ define <4 x i32> @bitselect_splat_second_zero_and_icmp(<4 x i32> %input) {
; CHECK-LABEL: bitselect_splat_second_zero_and_icmp:
; CHECK: .functype bitselect_splat_second_zero_and_icmp (v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.const $push5=, 0, 0, 0, 0
-; CHECK-NEXT: local.tee $push4=, $1=, $pop5
; CHECK-NEXT: v128.const $push0=, 2139095040, 2139095040, 2139095040, 2139095040
; CHECK-NEXT: v128.and $push1=, $0, $pop0
-; CHECK-NEXT: i32x4.eq $push2=, $1, $pop1
-; CHECK-NEXT: v128.bitselect $push3=, $0, $pop4, $pop2
-; CHECK-NEXT: return $pop3
+; CHECK-NEXT: v128.const $push2=, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.eq $push3=, $pop1, $pop2
+; CHECK-NEXT: v128.and $push4=, $pop3, $0
+; CHECK-NEXT: return $pop4
start:
%0 = and <4 x i32> %input, splat (i32 2139095040)
%1 = icmp eq <4 x i32> %0, zeroinitializer
@@ -60,13 +58,12 @@ define <4 x i32> @bitselect_splat_second_zero_cond_input(<4 x i1> %cond, <4 x i3
; CHECK-LABEL: bitselect_splat_second_zero_cond_input:
; CHECK: .functype bitselect_splat_second_zero_cond_input (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %start
-; CHECK-NEXT: v128.const $push3=, 0, 0, 0, 0
; CHECK-NEXT: i32.const $push0=, 31
; CHECK-NEXT: i32x4.shl $push1=, $0, $pop0
-; CHECK-NEXT: i32.const $push5=, 31
-; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop5
-; CHECK-NEXT: v128.bitselect $push4=, $1, $pop3, $pop2
-; CHECK-NEXT: return $pop4
+; CHECK-NEXT: i32.const $push4=, 31
+; CHECK-NEXT: i32x4.shr_s $push2=, $pop1, $pop4
+; CHECK-NEXT: v128.and $push3=, $pop2, $1
+; CHECK-NEXT: return $pop3
start:
%2 = select <4 x i1> %cond, <4 x i32> %input, <4 x i32> zeroinitializer
ret <4 x i32> %2
More information about the llvm-commits
mailing list