[llvm] select m, sub/add(X, C), X --> sub/add (X, and(C, m)) (PR #82441)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 20 15:47:16 PST 2024
https://github.com/elhewaty created https://github.com/llvm/llvm-project/pull/82441
- [DAG] Add tests for Folding select m, sub(X, C), X --> sub (X, and(C, m))(NFC)
- [DAG][X86] Fold select m, sub/add(X, C), X --> sub/add (X, and(C, m))
- Fixes: https://github.com/llvm/llvm-project/issues/66101
>From 2662cff2c70469f73f6cfabd54b8ee9cb2543f62 Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Wed, 7 Feb 2024 02:18:26 +0200
Subject: [PATCH 1/2] [DAG] Add tests for Folding select m, sub(X, C), X -->
sub (X, and(C, m))(NFC)
---
llvm/test/CodeGen/X86/vselect.ll | 47 ++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index ce3dc8cc873cc7..c688f56a4949ea 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -7,6 +7,53 @@
; Verify that we don't emit packed vector shifts instructions if the
; condition used by the vector select is a vector of constants.
+define <2 x i64> @masked_select_const(<2 x i64> %a, <2 x i64> %x, <2 x i64> %y) {
+; SSE2-LABEL: masked_select_const:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
+; SSE2-NEXT: paddd %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: por %xmm1, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: masked_select_const:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4294967272,4294967272,4294967272,4294967272]
+; SSE41-NEXT: paddd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm3
+; SSE41-NEXT: movaps %xmm3, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: masked_select_const:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: masked_select_const:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
+; AVX2-NEXT: vpaddd %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %bit_a = bitcast <2 x i64> %a to <4 x i32>
+ %sub.i = add <4 x i32> %bit_a, <i32 -24, i32 -24, i32 -24, i32 -24>
+ %bit_x = bitcast <2 x i64> %x to <4 x i32>
+ %bit_y = bitcast <2 x i64> %y to <4 x i32>
+ %cmp.i = icmp sgt <4 x i32> %bit_x, %bit_y
+ %sel = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %bit_a
+ %bit_sel = bitcast <4 x i32> %sel to <2 x i64>
+ ret <2 x i64> %bit_sel
+}
+
define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: test1:
; SSE2: # %bb.0:
>From 49213f9360afe63a272165d738883cf3f8fe1c67 Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Wed, 21 Feb 2024 01:43:33 +0200
Subject: [PATCH 2/2] [DAG][X86] Fold select m, sub/add(X, C), X --> sub/add
(X, and(C, m))
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++++++++++++++++++
llvm/test/CodeGen/X86/vselect.ll | 2 +-
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2a09e44e192979..42b24efc81bc5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11660,6 +11660,24 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
+ // select m, sub(X, C), X --> sub (X, and(C, m))
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(0) == N2 && N1->hasOneUse() &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+ N0.getScalarValueSizeInBits() == N1.getScalarValueSizeInBits()) {
+ return DAG.getNode(ISD::SUB, DL, N1.getValueType(), N2,
+ DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1),
+ N0));
+ }
+
+ // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+ N0.getScalarValueSizeInBits() == N1.getScalarValueSizeInBits()) {
+ return DAG.getNode(ISD::ADD, DL, N1.getValueType(), N2,
+ DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1),
+ N0));
+ }
+
// Fold selects based on a setcc into other things, such as min/max/abs.
if (N0.getOpcode() == ISD::SETCC) {
SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index c688f56a4949ea..962a72f2daa5d1 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -22,7 +22,7 @@ define <2 x i64> @masked_select_const(<2 x i64> %a, <2 x i64> %x, <2 x i64> %y)
; SSE41-LABEL: masked_select_const:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4294967272,4294967272,4294967272,4294967272]
+; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967272,4294967272,4294967272,4294967272]
; SSE41-NEXT: paddd %xmm0, %xmm4
; SSE41-NEXT: pcmpgtd %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
More information about the llvm-commits
mailing list