[llvm] eaa4548 - [X86][SSE] Add PACKSS SimplifyMultipleUseDemandedBits 'sign bit' handling.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 20 02:49:14 PST 2020
Author: Simon Pilgrim
Date: 2020-01-20T10:48:54Z
New Revision: eaa4548459026707c909884219b5a5ca56678560
URL: https://github.com/llvm/llvm-project/commit/eaa4548459026707c909884219b5a5ca56678560
DIFF: https://github.com/llvm/llvm-project/commit/eaa4548459026707c909884219b5a5ca56678560.diff
LOG: [X86][SSE] Add PACKSS SimplifyMultipleUseDemandedBits 'sign bit' handling.
Attempt to use SimplifyMultipleUseDemandedBits to simplify PACKSS if we're only after the sign bit.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e24e22104ce4..fe18f0bc079f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36087,6 +36087,17 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS,
KnownRHS, TLO, Depth + 1))
return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op.getOperand(0), SignMask, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op.getOperand(1), SignMask, DemandedRHS, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0);
+ SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1));
+ }
}
// TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.
break;
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
index f0bd364e02fb..ab1a4c370251 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -1047,14 +1047,13 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX1-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-64-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3
; AVX1-64-NEXT: vorpd %ymm3, %ymm1, %ymm1
-; AVX1-64-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
-; AVX1-64-NEXT: vpextrq $1, %xmm3, %rax
+; AVX1-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX1-64-NEXT: vmovq %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX1-64-NEXT: vmovq %xmm3, %rax
-; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-64-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-64-NEXT: vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
; AVX1-64-NEXT: vmovq %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -1062,8 +1061,6 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm3
-; AVX1-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-64-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; AVX1-64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-64-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
; AVX1-64-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index c310e722ba7f..49cbffba8bb6 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -2154,22 +2154,21 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm1
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2
; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
-; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpextrq $1, %xmm2, %rax
+; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
-; AVX1-NEXT: vmovq %xmm2, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
-; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm3
+; AVX1-NEXT: vmovq %xmm1, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
-; AVX1-NEXT: vcvtsi2ss %eax, %xmm4, %xmm1
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2
-; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2573,14 +2572,13 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpextrq $1, %xmm3, %rax
+; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX1-NEXT: vmovq %xmm3, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -2588,8 +2586,6 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
-; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
; AVX1-NEXT: vzeroupper
@@ -4492,16 +4488,15 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm3
; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm4
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
-; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vorpd %ymm0, %ymm3, %ymm0
-; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm1, %xmm3
-; AVX1-NEXT: vpextrq $1, %xmm3, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
-; AVX1-NEXT: vmovq %xmm3, %rax
+; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm4
+; AVX1-NEXT: vorpd %ymm4, %ymm3, %ymm3
+; AVX1-NEXT: vblendvpd %ymm0, %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
+; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -4509,8 +4504,6 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
; AVX1-NEXT: vaddps %xmm0, %xmm0, %xmm3
-; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
@@ -4944,59 +4937,56 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
;
; AVX1-LABEL: uitofp_load_8i64_to_8f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [1,1,1,1]
-; AVX1-NEXT: vandpd 32(%rdi), %ymm2, %ymm3
-; AVX1-NEXT: vmovaps (%rdi), %xmm0
+; AVX1-NEXT: vmovapd (%rdi), %ymm2
+; AVX1-NEXT: vmovapd 32(%rdi), %ymm3
+; AVX1-NEXT: vmovapd {{.*#+}} ymm8 = [1,1,1,1]
+; AVX1-NEXT: vandpd %ymm3, %ymm8, %ymm5
+; AVX1-NEXT: vmovdqa (%rdi), %xmm9
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT: vmovdqa 32(%rdi), %xmm4
-; AVX1-NEXT: vmovdqa 48(%rdi), %xmm5
-; AVX1-NEXT: vpsrlq $1, %xmm4, %xmm6
-; AVX1-NEXT: vpsrlq $1, %xmm5, %xmm7
-; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm6, %ymm6
-; AVX1-NEXT: vorpd %ymm3, %ymm6, %ymm3
-; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm4, %xmm6
-; AVX1-NEXT: vpextrq $1, %xmm6, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm7
-; AVX1-NEXT: vmovq %xmm6, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm6
-; AVX1-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3]
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm5, %xmm3
-; AVX1-NEXT: vmovq %xmm3, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm7
-; AVX1-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0,1],xmm7[0],xmm6[3]
-; AVX1-NEXT: vpextrq $1, %xmm3, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm3
-; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm6[0,1,2],xmm3[0]
-; AVX1-NEXT: vaddps %xmm3, %xmm3, %xmm6
-; AVX1-NEXT: vxorps %xmm7, %xmm7, %xmm7
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm5
-; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vblendvps %xmm4, %xmm6, %xmm3, %xmm3
-; AVX1-NEXT: vandpd (%rdi), %ymm2, %ymm2
-; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm4
+; AVX1-NEXT: vmovdqa 32(%rdi), %xmm6
+; AVX1-NEXT: vpsrlq $1, %xmm6, %xmm7
+; AVX1-NEXT: vmovdqa 48(%rdi), %xmm4
+; AVX1-NEXT: vpsrlq $1, %xmm4, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm7, %ymm0
+; AVX1-NEXT: vorpd %ymm5, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm3, %ymm0
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm3
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm5
+; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm5
+; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0],xmm3[3]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
+; AVX1-NEXT: vaddps %xmm0, %xmm0, %xmm3
+; AVX1-NEXT: vpackssdw %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vblendvps %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vandpd %ymm2, %ymm8, %ymm3
+; AVX1-NEXT: vpsrlq $1, %xmm9, %xmm4
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm5
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; AVX1-NEXT: vorpd %ymm2, %ymm4, %ymm2
-; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm0, %xmm4
-; AVX1-NEXT: vpextrq $1, %xmm4, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm5
-; AVX1-NEXT: vmovq %xmm4, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm4
-; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
+; AVX1-NEXT: vorpd %ymm3, %ymm4, %ymm3
+; AVX1-NEXT: vblendvpd %ymm2, %ymm3, %ymm2, %ymm2
+; AVX1-NEXT: vpextrq $1, %xmm2, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm3
+; AVX1-NEXT: vmovq %xmm2, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm4
+; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vmovq %xmm2, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm5
-; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3]
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm4
+; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm2
-; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0]
-; AVX1-NEXT: vaddps %xmm2, %xmm2, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vblendvps %xmm0, %xmm4, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm2
+; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
+; AVX1-NEXT: vaddps %xmm2, %xmm2, %xmm3
+; AVX1-NEXT: vpackssdw %xmm1, %xmm9, %xmm1
+; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm2, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_load_8i64_to_8f32:
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 7c35ccebefb7..8ea92294fc03 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -7492,14 +7492,13 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
-; AVX1-NEXT: vpextrq $1, %xmm3, %rax
+; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX1-NEXT: vmovq %xmm3, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -7507,8 +7506,6 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
-; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
; AVX1-NEXT: vzeroupper
More information about the llvm-commits
mailing list