[llvm] 6f5a805 - [DAG] Fold i1/vXi1 saddsat/uaddsat(x,y) -> or(x,y)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 13 07:02:57 PST 2021
Author: Simon Pilgrim
Date: 2021-02-13T15:02:01Z
New Revision: 6f5a805bbbed5d0cdaaf67846dffa7f044afb407
URL: https://github.com/llvm/llvm-project/commit/6f5a805bbbed5d0cdaaf67846dffa7f044afb407
DIFF: https://github.com/llvm/llvm-project/commit/6f5a805bbbed5d0cdaaf67846dffa7f044afb407.diff
LOG: [DAG] Fold i1/vXi1 saddsat/uaddsat(x,y) -> or(x,y)
Alive2: https://alive2.llvm.org/ce/z/FzcrpH
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/sadd_sat_vec.ll
llvm/test/CodeGen/X86/uadd_sat_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42988688111e..31d78a6fd679 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2540,6 +2540,10 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
if (isNullConstant(N1))
return N0;
+ // fold (add_sat x, y) -> (or x, y) for bool types.
+ if (VT.getScalarType() == MVT::i1)
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+
// If it cannot overflow, transform into an add.
if (Opcode == ISD::UADDSAT)
if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 1ae1ee43beee..2f3cae1aa07b 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -286,14 +286,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-NEXT: sshr v1.16b, v1.16b, #7
-; CHECK-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index 5f92f713573d..43a32b43b585 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -285,13 +285,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.16b, #1
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ushr v0.16b, v0.16b, #7
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 684bebaa85dd..9b81809962aa 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -5472,48 +5472,17 @@ define i1 @test_v1i1_mul(i1 %x, i1 %y) {
}
define <1 x i1> @uadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
-; KNL-LABEL: uadd_sat_v1i1:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: korw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $al killed $al killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: uadd_sat_v1i1:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: korw %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $al killed $al killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: uadd_sat_v1i1:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: korw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: uadd_sat_v1i1:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: korw %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: uadd_sat_v1i1:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: orl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: uadd_sat_v1i1:
; X86: ## %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: korw %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: ## kill: def $al killed $al killed $eax
; X86-NEXT: retl
%z = call <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
@@ -5572,48 +5541,17 @@ define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
declare <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
define <1 x i1> @sadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
-; KNL-LABEL: sadd_sat_v1i1:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: korw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $al killed $al killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: sadd_sat_v1i1:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: korw %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $al killed $al killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: sadd_sat_v1i1:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: korw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: sadd_sat_v1i1:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: korw %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: sadd_sat_v1i1:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: orl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: sadd_sat_v1i1:
; X86: ## %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: korw %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: ## kill: def $al killed $al killed $eax
; X86-NEXT: retl
%z = call <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index d5fdfe9b4aed..9eacc459b3f2 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -579,63 +579,13 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; SSE-LABEL: v16i1:
; SSE: # %bb.0:
-; SSE-NEXT: psllw $7, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT: pand %xmm2, %xmm1
-; SSE-NEXT: psllw $7, %xmm0
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: paddsb %xmm1, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpgtb %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: v16i1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: v16i1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v16i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v16i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovb2m %xmm1, %k0
-; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
-; AVX512BW-NEXT: korw %k0, %k1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
-; AVX512BW-NEXT: retq
+; AVX-LABEL: v16i1:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
}
diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
index 633238f0b1ed..5624dfd80844 100644
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -509,62 +509,13 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; SSE-LABEL: v16i1:
; SSE: # %bb.0:
-; SSE-NEXT: psllw $7, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT: pand %xmm2, %xmm1
-; SSE-NEXT: psllw $7, %xmm0
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: paddusb %xmm1, %xmm0
-; SSE-NEXT: psrlw $7, %xmm0
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: v16i1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: v16i1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v16i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v16i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovb2m %xmm1, %k0
-; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
-; AVX512BW-NEXT: korw %k0, %k1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
-; AVX512BW-NEXT: retq
+; AVX-LABEL: v16i1:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
}
More information about the llvm-commits
mailing list