[llvm] r300772 - [DAG] add splat vector support for 'or' in SimplifyDemandedBits
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 19 15:00:00 PDT 2017
Author: spatel
Date: Wed Apr 19 17:00:00 2017
New Revision: 300772
URL: http://llvm.org/viewvc/llvm-project?rev=300772&view=rev
Log:
[DAG] add splat vector support for 'or' in SimplifyDemandedBits
I've changed one of the tests to not fold away, but we didn't and still don't do the transform
that the comment claims we do (and I don't know why we'd want to do that).
Follow-up to:
https://reviews.llvm.org/rL300725
https://reviews.llvm.org/rL300763
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/combine-or.ll
llvm/trunk/test/CodeGen/X86/i64-to-float.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=300772&r1=300771&r2=300772&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Apr 19 17:00:00 2017
@@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N)
return Load;
// Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
Modified: llvm/trunk/test/CodeGen/X86/combine-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-or.ll?rev=300772&r1=300771&r2=300772&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-or.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-or.ll Wed Apr 19 17:00:00 2017
@@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <
ret <4 x i32> %or
}
+; TODO: Why would we do this?
; (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
@@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64>
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
- %1 = and <2 x i64> %a0, <i64 1, i64 1>
+ %1 = and <2 x i64> %a0, <i64 7, i64 7>
%2 = or <2 x i64> %1, <i64 3, i64 3>
ret <2 x i64> %2
}
+; If all masked bits are going to be set, that's a constant fold.
+
define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
; CHECK-LABEL: or_and_v4i32:
; CHECK: # BB#0:
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
%2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
@@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32>
define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
; CHECK-LABEL: or_zext_v2i32:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295]
; CHECK-NEXT: retq
%1 = zext <2 x i32> %a0 to <2 x i64>
%2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
@@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32
define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
; CHECK-LABEL: or_zext_v4i16:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
; CHECK-NEXT: retq
%1 = zext <4 x i16> %a0 to <4 x i32>
%2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
Modified: llvm/trunk/test/CodeGen/X86/i64-to-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i64-to-float.ll?rev=300772&r1=300771&r2=300772&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/i64-to-float.ll (original)
+++ llvm/trunk/test/CodeGen/X86/i64-to-float.ll Wed Apr 19 17:00:00 2017
@@ -237,21 +237,19 @@ define <2 x double> @clamp_sitofp_2i64_2
; X64-SSE-NEXT: pandn %xmm0, %xmm2
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3
; X64-SSE-NEXT: por %xmm2, %xmm3
-; X64-SSE-NEXT: movdqa %xmm3, %xmm0
-; X64-SSE-NEXT: pxor %xmm1, %xmm0
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
-; X64-SSE-NEXT: por %xmm2, %xmm1
-; X64-SSE-NEXT: movdqa %xmm0, %xmm4
-; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; X64-SSE-NEXT: pxor %xmm3, %xmm1
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903]
+; X64-SSE-NEXT: movdqa %xmm1, %xmm2
+; X64-SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
-; X64-SSE-NEXT: pand %xmm5, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
+; X64-SSE-NEXT: pand %xmm4, %xmm0
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movdqa %xmm1, %xmm0
; X64-SSE-NEXT: pandn %xmm3, %xmm0
-; X64-SSE-NEXT: pand %xmm2, %xmm1
+; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movd %xmm1, %rax
; X64-SSE-NEXT: xorps %xmm0, %xmm0
More information about the llvm-commits
mailing list