[llvm] r360552 - Recommit r358887 "[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling"
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun May 12 21:03:36 PDT 2019
Author: ctopper
Date: Sun May 12 21:03:35 2019
New Revision: 360552
URL: http://llvm.org/viewvc/llvm-project?rev=360552&view=rev
Log:
Recommit r358887 "[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling"
I've included a new fix in X86RegisterInfo to prevent PR41619 without
reintroducing r359392. We might be able to improve that in the base class
implementation of shouldRewriteCopySrc somehow. But this hopefully enables
forward progress on SimplifyDemandedBits improvements for now.
Original commit message:
This patch adds support for BigBitWidth -> SmallBitWidth bitcasts, splitting the DemandedBits/Elts accordingly.
The AMDGPU backend needed an extra (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) combine to encourage BFE creation, I investigated putting this in DAGComb
but it caused a lot of noise on other targets - some improvements, some regressions.
The X86 changes are all definite wins.
Added:
llvm/trunk/test/CodeGen/X86/pr41619.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
llvm/trunk/lib/Target/X86/X86RegisterInfo.h
llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll
llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll
llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll
llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Sun May 12 21:03:35 2019
@@ -1581,12 +1581,36 @@ bool TargetLowering::SimplifyDemandedBit
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
+ } else if ((NumSrcEltBits % BitWidth) == 0 &&
+ TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / BitWidth;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * BitWidth;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SrcVT.isVector()) {
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
+ return true;
+ }
+
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
- Known = TLO.DAG.computeKnownBits(Op, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
break;
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Sun May 12 21:03:35 2019
@@ -216,6 +216,21 @@ X86RegisterInfo::getPointerRegClass(cons
}
}
+bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // Prevent rewriting a copy where the destination size is larger than the
+ // input size. See PR41619.
+ // FIXME: Should this be factored into the base implementation somehow.
+ if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
+ SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
+ return false;
+
+ return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
+ SrcRC, SrcSubReg);
+}
+
const TargetRegisterClass *
X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Sun May 12 21:03:35 2019
@@ -74,6 +74,11 @@ public:
getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &MF) const override;
+ bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const override;
+
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
const TargetRegisterClass *
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll Sun May 12 21:03:35 2019
@@ -448,22 +448,6 @@ define void @bitcast_8i32_store(i8* %p,
define void @bitcast_4i64_store(i4* %p, <4 x i64> %a0) {
; SSE2-SSSE3-LABEL: bitcast_4i64_store:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
-; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
-; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
-; SSE2-SSSE3-NEXT: por %xmm4, %xmm1
-; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
-; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
-; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: movb %al, (%rdi)
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll Sun May 12 21:03:35 2019
@@ -609,15 +609,13 @@ define void @bitcast_8i64_store(i8* %p,
;
; AVX1-LABEL: bitcast_8i64_store:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: movb %al, (%rdi)
Modified: llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll Sun May 12 21:03:35 2019
@@ -208,22 +208,6 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i
define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
-; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
-; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
-; SSE2-SSSE3-NEXT: por %xmm4, %xmm1
-; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
-; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
-; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: movl %eax, %ecx
@@ -532,15 +516,13 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i6
;
; AVX1-LABEL: bitcast_v8i64_to_v2i4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: movl %eax, %ecx
Modified: llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll Sun May 12 21:03:35 2019
@@ -14,18 +14,11 @@ define i32 @t(i8* %ref_frame_ptr, i32 %r
;
; X64-LABEL: t:
; X64: ## %bb.0: ## %entry
-; X64-NEXT: ## kill: def $edx killed $edx def $rdx
-; X64-NEXT: ## kill: def $esi killed $esi def $rsi
; X64-NEXT: imull %ecx, %esi
-; X64-NEXT: leal (%rsi,%rdx), %eax
-; X64-NEXT: cltq
+; X64-NEXT: addl %edx, %esi
+; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movl (%rdi,%rax), %eax
-; X64-NEXT: leal 4(%rsi,%rdx), %ecx
-; X64-NEXT: movslq %ecx, %rcx
-; X64-NEXT: movzwl (%rdi,%rcx), %ecx
-; X64-NEXT: shlq $32, %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: movq %rcx, %xmm0
+; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll?rev=360552&r1=360551&r2=360552&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll Sun May 12 21:03:35 2019
@@ -929,22 +929,6 @@ define i1 @allzeros_v16i32_sign(<16 x i3
define i1 @allones_v4i64_sign(<4 x i64> %arg) {
; SSE2-LABEL: allones_v4i64_sign:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm2, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE2-NEXT: movdqa %xmm2, %xmm4
-; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
-; SSE2-NEXT: pand %xmm3, %xmm1
-; SSE2-NEXT: por %xmm4, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
-; SSE2-NEXT: pand %xmm3, %xmm0
-; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
; SSE2-NEXT: cmpb $15, %al
@@ -989,22 +973,6 @@ define i1 @allones_v4i64_sign(<4 x i64>
define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
; SSE2-LABEL: allzeros_v4i64_sign:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm2, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE2-NEXT: movdqa %xmm2, %xmm4
-; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
-; SSE2-NEXT: pand %xmm3, %xmm1
-; SSE2-NEXT: por %xmm4, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
-; SSE2-NEXT: pand %xmm3, %xmm0
-; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
; SSE2-NEXT: testb %al, %al
@@ -1095,15 +1063,13 @@ define i1 @allones_v8i64_sign(<8 x i64>
;
; AVX1-LABEL: allones_v8i64_sign:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: cmpb $-1, %al
@@ -1198,15 +1164,13 @@ define i1 @allzeros_v8i64_sign(<8 x i64>
;
; AVX1-LABEL: allzeros_v8i64_sign:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: testb %al, %al
@@ -2539,19 +2503,17 @@ define i1 @allones_v8i64_and1(<8 x i64>
;
; AVX1-LABEL: allones_v8i64_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: cmpb $-1, %al
@@ -2615,19 +2577,17 @@ define i1 @allzeros_v8i64_and1(<8 x i64>
;
; AVX1-LABEL: allzeros_v8i64_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: testb %al, %al
@@ -3962,19 +3922,17 @@ define i1 @allones_v8i64_and4(<8 x i64>
;
; AVX1-LABEL: allones_v8i64_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: cmpb $-1, %al
@@ -4038,19 +3996,17 @@ define i1 @allzeros_v8i64_and4(<8 x i64>
;
; AVX1-LABEL: allzeros_v8i64_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
+; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: testb %al, %al
@@ -4170,22 +4126,6 @@ define i32 @movmskps(<4 x float> %x) {
define i32 @movmskpd256(<4 x double> %x) {
; SSE2-LABEL: movmskpd256:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm2, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE2-NEXT: movdqa %xmm2, %xmm4
-; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
-; SSE2-NEXT: pand %xmm3, %xmm1
-; SSE2-NEXT: por %xmm4, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
-; SSE2-NEXT: pand %xmm3, %xmm0
-; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
; SSE2-NEXT: retq
Added: llvm/trunk/test/CodeGen/X86/pr41619.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr41619.ll?rev=360552&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr41619.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr41619.ll Sun May 12 21:03:35 2019
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.14.0 -mattr=avx2 | FileCheck %s
+
+define void @foo(double %arg) {
+; CHECK-LABEL: foo:
+; CHECK: ## %bb.0: ## %bb
+; CHECK-NEXT: vmovq %xmm0, %rax
+; CHECK-NEXT: vmovd %eax, %xmm0
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vmovq %xmm0, %rax
+; CHECK-NEXT: movl %eax, (%rax)
+; CHECK-NEXT: vmovlps %xmm1, (%rax)
+; CHECK-NEXT: retq
+bb:
+ %tmp = bitcast double %arg to i64
+ %tmp1 = trunc i64 %tmp to i32
+ %tmp2 = bitcast i32 %tmp1 to float
+ %tmp3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 2
+ %tmp4 = bitcast <4 x float> %tmp3 to <2 x double>
+ %tmp5 = extractelement <2 x double> %tmp4, i32 0
+ %tmp6 = extractelement <2 x double> %tmp4, i32 1
+ %tmp7 = bitcast double %tmp6 to i64
+ %tmp8 = trunc i64 %tmp7 to i32
+ store i32 %tmp8, i32* undef, align 4
+ store double %tmp5, double* undef, align 16
+ ret void
+}
More information about the llvm-commits
mailing list