[llvm] 80a328b - [X86] SimplifyDemandedVectorEltsForTargetNode - add basic PCMPEQ/PCMPGT handling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 07:23:10 PST 2024
Author: Simon Pilgrim
Date: 2024-02-29T15:22:12Z
New Revision: 80a328b0118faa691137d6325b0eed4199060adc
URL: https://github.com/llvm/llvm-project/commit/80a328b0118faa691137d6325b0eed4199060adc
DIFF: https://github.com/llvm/llvm-project/commit/80a328b0118faa691137d6325b0eed4199060adc.diff
LOG: [X86] SimplifyDemandedVectorEltsForTargetNode - add basic PCMPEQ/PCMPGT handling
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
llvm/test/CodeGen/X86/test-shrink-bug.ll
llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
llvm/test/CodeGen/X86/vector-reduce-umax.ll
llvm/test/CodeGen/X86/vector-reduce-umin.ll
llvm/test/CodeGen/X86/vselect.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d98d914894a3f8..b807a97d6e4851 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41262,6 +41262,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero = LHSZero;
break;
}
+ case X86ISD::PCMPEQ:
+ case X86ISD::PCMPGT: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
case X86ISD::KSHIFTL: {
SDValue Src = Op.getOperand(0);
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
index 5fde9bd5566b40..9946267b48e7ff 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -635,7 +635,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-LABEL: test_reduce_v4i64:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
index 699dce75e505c7..0bbf94f1817f51 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -581,7 +581,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-LABEL: test_reduce_v4i64:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
index fdb2f41ec0e498..d644ed87c3c108 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
@@ -267,11 +267,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE41-NEXT: pxor %xmm1, %xmm2
+; SSE41-NEXT: movl $3, %eax
+; SSE41-NEXT: movq %rax, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pxor %xmm1, %xmm3
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: pextrb $8, %xmm0, %edx
-; SSE41-NEXT: pextrb $0, %xmm2, %ecx
+; SSE41-NEXT: pextrb $0, %xmm3, %ecx
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: # kill: def $dl killed $dl killed $edx
; SSE41-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -318,7 +320,9 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: movl $3, %eax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index 74c5179b187896..ed43cabbdaee11 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -67,7 +67,7 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107
; CHECK-X64-NEXT: je .LBB1_3
; CHECK-X64-NEXT: # %bb.1:
-; CHECK-X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-X64-NEXT: pslld $8, %xmm0
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
; CHECK-X64-NEXT: testb $1, %al
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
index a15de8b8e0f6ad..6a36cd2a86d5cd 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
@@ -264,7 +264,6 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
index 6d99bedd40b91c..cdeca96732dc31 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -25,13 +25,7 @@ define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind {
define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: t1_all_odd_eq:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -82,13 +76,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: t1_all_odd_ne:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -256,7 +244,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; CHECK-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: movabsq $-3074457345618258603, %rax # imm = 0xD555555555555555
+; CHECK-AVX1-NEXT: vmovq %rax, %xmm1
+; CHECK-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
@@ -273,7 +263,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: movabsq $-3074457345618258603, %rax # imm = 0xD555555555555555
+; CHECK-AVX2-NEXT: vmovq %rax, %xmm1
+; CHECK-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index 4799b8e7e5857b..3b25a6e033f2fd 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -210,7 +210,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index 75eeec456c9ac3..2d68cf9d6374d7 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -211,7 +211,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-LABEL: test_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index ce3dc8cc873cc7..cc4eb0c8f7343b 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -741,14 +741,24 @@ define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) {
; SSE-NEXT: shll $15, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: andl $1, %eax
-; AVX-NEXT: shll $15, %eax
-; AVX-NEXT: retq
+; AVX1-LABEL: vselect_any_extend_vector_inreg_crash:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: shll $15, %eax
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: vselect_any_extend_vector_inreg_crash:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [49,49,49,49]
+; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: shll $15, %eax
+; AVX2-NEXT: retq
0:
%1 = load <8 x i8>, ptr %x
%2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
More information about the llvm-commits
mailing list