[llvm] r300763 - [DAG] add splat vector support for 'xor' in SimplifyDemandedBits
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 19 14:23:10 PDT 2017
Author: spatel
Date: Wed Apr 19 16:23:09 2017
New Revision: 300763
URL: http://llvm.org/viewvc/llvm-project?rev=300763&view=rev
Log:
[DAG] add splat vector support for 'xor' in SimplifyDemandedBits
This allows forming more 'not' ops, so we get improvements for ISAs that have and-not.
Follow-up to:
https://reviews.llvm.org/rL300725
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/ARM/vbits.ll
llvm/trunk/test/CodeGen/PowerPC/andc.ll
llvm/trunk/test/CodeGen/X86/avx-logic.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/i64-to-float.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=300763&r1=300762&r2=300763&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Apr 19 16:23:09 2017
@@ -5058,8 +5058,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N)
return Tmp;
// Simplify the expression using non-local knowledge.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=300763&r1=300762&r2=300763&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Wed Apr 19 16:23:09 2017
@@ -715,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBit
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
// If we can't force bits, try to shrink the constant.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt Expanded = C->getAPIntValue() | (~NewMask);
// If we can expand it to have all bits set, do it.
if (Expanded.isAllOnesValue()) {
Modified: llvm/trunk/test/CodeGen/ARM/vbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vbits.ll?rev=300763&r1=300762&r2=300763&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vbits.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vbits.ll Wed Apr 19 16:23:09 2017
@@ -850,11 +850,9 @@ define <4 x i32> @hidden_not_v4i32(<4 x
; CHECK-LABEL: hidden_not_v4i32:
; CHECK: @ BB#0:
; CHECK-NEXT: vmov d19, r2, r3
-; CHECK-NEXT: vmov.i32 q8, #0xf
+; CHECK-NEXT: vmov.i32 q8, #0x6
; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vmov.i32 q10, #0x6
-; CHECK-NEXT: veor q8, q9, q8
-; CHECK-NEXT: vand q8, q8, q10
+; CHECK-NEXT: vbic q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: bx lr
Modified: llvm/trunk/test/CodeGen/PowerPC/andc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/andc.ll?rev=300763&r1=300762&r2=300763&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/andc.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/andc.ll Wed Apr 19 16:23:09 2017
@@ -43,10 +43,8 @@ define i1 @foo(i32 %i) {
define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) {
; CHECK-LABEL: hidden_not_v4i32:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisw 3, 15
-; CHECK-NEXT: vspltisw 4, 6
-; CHECK-NEXT: xxlxor 0, 34, 35
-; CHECK-NEXT: xxland 34, 0, 36
+; CHECK-NEXT: vspltisw 3, 6
+; CHECK-NEXT: xxlandc 34, 35, 34
; CHECK-NEXT: blr
%xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
%and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6>
Modified: llvm/trunk/test/CodeGen/X86/avx-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-logic.ll?rev=300763&r1=300762&r2=300763&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-logic.ll Wed Apr 19 16:23:09 2017
@@ -274,16 +274,13 @@ entry:
define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind {
; AVX-LABEL: and_xor_splat1_v4i32:
; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
-; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: and_xor_splat1_v4i32:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%xor = xor <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%and = and <4 x i32> %xor, <i32 1, i32 1, i32 1, i32 1>
@@ -293,16 +290,13 @@ define <4 x i32> @and_xor_splat1_v4i32(<
define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind {
; AVX-LABEL: and_xor_splat1_v4i64:
; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1]
-; AVX-NEXT: vxorps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: and_xor_splat1_v4i64:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
-; AVX512-NEXT: vxorps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vandnps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%xor = xor <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
%and = and <4 x i64> %xor, <i64 1, i64 1, i64 1, i64 1>
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=300763&r1=300762&r2=300763&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Apr 19 16:23:09 2017
@@ -1430,7 +1430,8 @@ define void @store_v1i1(<1 x i1> %c , <1
define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
; KNL-LABEL: store_v2i1:
; KNL: ## BB#0:
-; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
@@ -1447,7 +1448,8 @@ define void @store_v2i1(<2 x i1> %c , <2
;
; AVX512BW-LABEL: store_v2i1:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
@@ -1457,7 +1459,8 @@ define void @store_v2i1(<2 x i1> %c , <2
;
; AVX512DQ-LABEL: store_v2i1:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
@@ -1471,7 +1474,7 @@ define void @store_v2i1(<2 x i1> %c , <2
define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
; KNL-LABEL: store_v4i1:
; KNL: ## BB#0:
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpslld $31, %ymm0, %ymm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1489,7 +1492,7 @@ define void @store_v4i1(<4 x i1> %c , <4
;
; AVX512BW-LABEL: store_v4i1:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1500,7 +1503,7 @@ define void @store_v4i1(<4 x i1> %c , <4
;
; AVX512DQ-LABEL: store_v4i1:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
Modified: llvm/trunk/test/CodeGen/X86/i64-to-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i64-to-float.ll?rev=300763&r1=300762&r2=300763&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/i64-to-float.ll (original)
+++ llvm/trunk/test/CodeGen/X86/i64-to-float.ll Wed Apr 19 16:23:09 2017
@@ -224,35 +224,33 @@ define <2 x double> @clamp_sitofp_2i64_2
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
; X64-SSE-NEXT: movdqa %xmm0, %xmm2
; X64-SSE-NEXT: pxor %xmm1, %xmm2
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551361,18446744073709551361]
-; X64-SSE-NEXT: movdqa %xmm1, %xmm4
-; X64-SSE-NEXT: pxor %xmm3, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm5
-; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm5
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
-; X64-SSE-NEXT: pcmpeqd %xmm2, %xmm4
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
-; X64-SSE-NEXT: pand %xmm6, %xmm2
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
-; X64-SSE-NEXT: por %xmm2, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm2
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713]
+; X64-SSE-NEXT: movdqa %xmm3, %xmm4
+; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm4
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; X64-SSE-NEXT: pand %xmm5, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; X64-SSE-NEXT: por %xmm2, %xmm3
+; X64-SSE-NEXT: movdqa %xmm3, %xmm2
; X64-SSE-NEXT: pandn %xmm0, %xmm2
-; X64-SSE-NEXT: pand %xmm3, %xmm4
-; X64-SSE-NEXT: por %xmm2, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm0
+; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3
+; X64-SSE-NEXT: por %xmm2, %xmm3
+; X64-SSE-NEXT: movdqa %xmm3, %xmm0
; X64-SSE-NEXT: pxor %xmm1, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
-; X64-SSE-NEXT: pxor %xmm2, %xmm1
-; X64-SSE-NEXT: movdqa %xmm0, %xmm3
-; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm3
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
+; X64-SSE-NEXT: por %xmm2, %xmm1
+; X64-SSE-NEXT: movdqa %xmm0, %xmm4
+; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; X64-SSE-NEXT: pand %xmm5, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE-NEXT: pandn %xmm4, %xmm0
+; X64-SSE-NEXT: pandn %xmm3, %xmm0
; X64-SSE-NEXT: pand %xmm2, %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movd %xmm1, %rax
More information about the llvm-commits
mailing list