[llvm] [DAG] select (sext m), (add X, C), X --> (add X, (and C, (sext m)))) (PR #83640)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 3 03:30:05 PST 2024
https://github.com/elhewaty updated https://github.com/llvm/llvm-project/pull/83640
>From 1fe9a56793f4b46e1b454c7b526c98a1cd8e7813 Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Sat, 2 Mar 2024 02:45:48 +0200
Subject: [PATCH 1/2] [DAG][X86] Add tests for Folding select m, add(X, C), X
--> add (X, and(C, m))(NFC)
---
llvm/test/CodeGen/X86/vselect.ll | 44 ++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index cc4eb0c8f7343b..c3823d5da75ac8 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -4,6 +4,50 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; should Fold select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
+define <4 x i32> @masked_select_const(<4 x i32> %a, <4 x i32> %x, <4 x i32> %y) {
+; SSE2-LABEL: masked_select_const:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
+; SSE2-NEXT: paddd %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: por %xmm1, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: masked_select_const:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967272,4294967272,4294967272,4294967272]
+; SSE41-NEXT: paddd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm3
+; SSE41-NEXT: movaps %xmm3, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: masked_select_const:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: masked_select_const:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
+; AVX2-NEXT: vpaddd %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %sub.i = add <4 x i32> %a, <i32 -24, i32 -24, i32 -24, i32 -24>
+ %cmp.i = icmp sgt <4 x i32> %x, %y
+ %sel = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
; Verify that we don't emit packed vector shifts instructions if the
; condition used by the vector select is a vector of constants.
>From 90933d8c682c9dd9c1b99ad2a6515bf60b4c49f6 Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Sat, 2 Mar 2024 18:50:23 +0200
Subject: [PATCH 2/2] [DAG][X86] Fold select (sext m), (add X, C), X --> (add
X, (and C, (sext m))))
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 ++++++
llvm/test/CodeGen/X86/vselect.ll | 37 ++++++-------------
2 files changed, 22 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 33ada3655dc731..32b98aba65a869 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12070,6 +12070,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
return DAG.getSelect(DL, VT, F, N2, N1);
+ // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+ N0.getScalarValueSizeInBits() == N1.getScalarValueSizeInBits() &&
+ TLI.getBooleanContents(N0.getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ return DAG.getNode(
+ ISD::ADD, DL, N1.getValueType(), N2,
+ DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
+ }
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index c3823d5da75ac8..9acd995d612c31 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -4,43 +4,28 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; should Fold select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
+; PR66101 - Fold select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
define <4 x i32> @masked_select_const(<4 x i32> %a, <4 x i32> %x, <4 x i32> %y) {
-; SSE2-LABEL: masked_select_const:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
-; SSE2-NEXT: paddd %xmm0, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm1, %xmm3
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm3
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: masked_select_const:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967272,4294967272,4294967272,4294967272]
-; SSE41-NEXT: paddd %xmm0, %xmm4
-; SSE41-NEXT: pcmpgtd %xmm2, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm3
-; SSE41-NEXT: movaps %xmm3, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: masked_select_const:
+; SSE: # %bb.0:
+; SSE-NEXT: pcmpgtd %xmm2, %xmm1
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: paddd %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX1-LABEL: masked_select_const:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: masked_select_const:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
-; AVX2-NEXT: vpaddd %xmm3, %xmm0, %xmm3
; AVX2-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%sub.i = add <4 x i32> %a, <i32 -24, i32 -24, i32 -24, i32 -24>
%cmp.i = icmp sgt <4 x i32> %x, %y
More information about the llvm-commits
mailing list