[llvm] 0df15e5 - [DAG] Fold i1/vXi1 ssubsat/usubsat(x, y) -> and(x, ~y)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 13 05:21:36 PST 2021
Author: Simon Pilgrim
Date: 2021-02-13T13:21:15Z
New Revision: 0df15e5eff8dec82a619c1d27985356a8aa4037e
URL: https://github.com/llvm/llvm-project/commit/0df15e5eff8dec82a619c1d27985356a8aa4037e
DIFF: https://github.com/llvm/llvm-project/commit/0df15e5eff8dec82a619c1d27985356a8aa4037e.diff
LOG: [DAG] Fold i1/vXi1 ssubsat/usubsat(x,y) -> and(x,~y)
Alive2: https://alive2.llvm.org/ce/z/4nkNGh
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
llvm/test/CodeGen/AArch64/usub_sat_vec.ll
llvm/test/CodeGen/X86/ssub_sat_vec.ll
llvm/test/CodeGen/X86/usub_sat_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 19ef12b6ae2d..42988688111e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3560,6 +3560,10 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
if (isNullConstant(N1))
return N0;
+ // fold (sub_sat x, y) -> (and x, ~y) for bool types.
+ if (VT.getScalarType() == MVT::i1)
+ return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, N1, VT));
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index c5a55f23913a..6ee1f3523f1e 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -287,14 +287,9 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-NEXT: sshr v1.16b, v1.16b, #7
-; CHECK-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-NEXT: movi v2.16b, #1
+; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index a361314126a1..a0ab8040e8fc 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -275,9 +275,8 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #1
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index 91198d0bf98a..484a8bba8fda 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -575,62 +575,31 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; SSE-LABEL: v16i1:
; SSE: # %bb.0:
-; SSE-NEXT: psllw $7, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT: pand %xmm2, %xmm1
-; SSE-NEXT: psllw $7, %xmm0
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: psubsb %xmm1, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpgtb %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: xorps {{.*}}(%rip), %xmm1
+; SSE-NEXT: andps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: v16i1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i1:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: v16i1:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
-; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
-; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
-; AVX512BW-NEXT: kandnw %k0, %k1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
+; AVX512BW-NEXT: vpternlogq $96, {{.*}}(%rip), %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index 247f09d04644..263ce19dd9b8 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -501,49 +501,31 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; SSE-LABEL: v16i1:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SSE-NEXT: pand %xmm2, %xmm1
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: psubusb %xmm1, %xmm0
+; SSE-NEXT: xorps {{.*}}(%rip), %xmm1
+; SSE-NEXT: andps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: v16i1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: v16i1:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: v16i1:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
-; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
-; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
-; AVX512BW-NEXT: kandnw %k0, %k1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
+; AVX512BW-NEXT: vpternlogq $96, {{.*}}(%rip), %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
More information about the llvm-commits
mailing list