[llvm] eaa4548 - [X86][SSE] Add PACKSS SimplifyMultipleUseDemandedBits 'sign bit' handling.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 20 02:49:14 PST 2020


Author: Simon Pilgrim
Date: 2020-01-20T10:48:54Z
New Revision: eaa4548459026707c909884219b5a5ca56678560

URL: https://github.com/llvm/llvm-project/commit/eaa4548459026707c909884219b5a5ca56678560
DIFF: https://github.com/llvm/llvm-project/commit/eaa4548459026707c909884219b5a5ca56678560.diff

LOG: [X86][SSE] Add PACKSS SimplifyMultipleUseDemandedBits 'sign bit' handling.

Attempt to use SimplifyMultipleUseDemandedBits to simplify PACKSS if we're only after the sign bit.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
    llvm/test/CodeGen/X86/vec_int_to_fp.ll
    llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e24e22104ce4..fe18f0bc079f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36087,6 +36087,17 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
       if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS,
                                KnownRHS, TLO, Depth + 1))
         return true;
+
+      // Attempt to avoid multi-use ops if we don't need anything from them.
+      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+          Op.getOperand(0), SignMask, DemandedLHS, TLO.DAG, Depth + 1);
+      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+          Op.getOperand(1), SignMask, DemandedRHS, TLO.DAG, Depth + 1);
+      if (DemandedOp0 || DemandedOp1) {
+        SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0);
+        SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1));
+      }
     }
     // TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.
     break;

diff  --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
index f0bd364e02fb..ab1a4c370251 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -1047,14 +1047,13 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
 ; AVX1-64-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-64-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm3
 ; AVX1-64-NEXT:    vorpd %ymm3, %ymm1, %ymm1
-; AVX1-64-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
-; AVX1-64-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX1-64-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-64-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX1-64-NEXT:    vmovq %xmm1, %rax
 ; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX1-64-NEXT:    vmovq %xmm3, %rax
-; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
 ; AVX1-64-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-64-NEXT:    vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
 ; AVX1-64-NEXT:    vmovq %xmm1, %rax
 ; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
 ; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -1062,8 +1061,6 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
 ; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
 ; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
 ; AVX1-64-NEXT:    vaddps %xmm1, %xmm1, %xmm3
-; AVX1-64-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX1-64-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-64-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-64-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
 ; AVX1-64-NEXT:    vzeroupper

diff  --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index c310e722ba7f..49cbffba8bb6 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -2154,22 +2154,21 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm1
 ; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm2
 ; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
-; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT:    vmovq %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
-; AVX1-NEXT:    vmovq %xmm2, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm2
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vcvtdq2ps %xmm1, %xmm3
+; AVX1-NEXT:    vmovq %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
 ; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX1-NEXT:    vcvtsi2ss %eax, %xmm4, %xmm1
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm1
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
 ; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -2573,14 +2572,13 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm3
 ; AVX1-NEXT:    vorpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX1-NEXT:    vmovq %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX1-NEXT:    vmovq %xmm3, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vmovq %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -2588,8 +2586,6 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
 ; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm3
-; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
 ; AVX1-NEXT:    vzeroupper
@@ -4492,16 +4488,15 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; AVX1-NEXT:    vpsrlq $1, %xmm1, %xmm3
 ; AVX1-NEXT:    vpsrlq $1, %xmm2, %xmm4
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
-; AVX1-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT:    vorpd %ymm0, %ymm3, %ymm0
-; AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm1, %xmm3
-; AVX1-NEXT:    vpextrq $1, %xmm3, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
-; AVX1-NEXT:    vmovq %xmm3, %rax
+; AVX1-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm4
+; AVX1-NEXT:    vorpd %ymm4, %ymm3, %ymm3
+; AVX1-NEXT:    vblendvpd %ymm0, %ymm3, %ymm0, %ymm0
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -4509,8 +4504,6 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm0
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
 ; AVX1-NEXT:    vaddps %xmm0, %xmm0, %xmm3
-; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vblendvps %xmm1, %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
@@ -4944,59 +4937,56 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ;
 ; AVX1-LABEL: uitofp_load_8i64_to_8f32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [1,1,1,1]
-; AVX1-NEXT:    vandpd 32(%rdi), %ymm2, %ymm3
-; AVX1-NEXT:    vmovaps (%rdi), %xmm0
+; AVX1-NEXT:    vmovapd (%rdi), %ymm2
+; AVX1-NEXT:    vmovapd 32(%rdi), %ymm3
+; AVX1-NEXT:    vmovapd {{.*#+}} ymm8 = [1,1,1,1]
+; AVX1-NEXT:    vandpd %ymm3, %ymm8, %ymm5
+; AVX1-NEXT:    vmovdqa (%rdi), %xmm9
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm4
-; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm5
-; AVX1-NEXT:    vpsrlq $1, %xmm4, %xmm6
-; AVX1-NEXT:    vpsrlq $1, %xmm5, %xmm7
-; AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm6, %ymm6
-; AVX1-NEXT:    vorpd %ymm3, %ymm6, %ymm3
-; AVX1-NEXT:    vblendvpd %xmm4, %xmm3, %xmm4, %xmm6
-; AVX1-NEXT:    vpextrq $1, %xmm6, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm7
-; AVX1-NEXT:    vmovq %xmm6, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm6
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3]
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT:    vblendvpd %xmm5, %xmm3, %xmm5, %xmm3
-; AVX1-NEXT:    vmovq %xmm3, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm7
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm6 = xmm6[0,1],xmm7[0],xmm6[3]
-; AVX1-NEXT:    vpextrq $1, %xmm3, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm3
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm6[0,1,2],xmm3[0]
-; AVX1-NEXT:    vaddps %xmm3, %xmm3, %xmm6
-; AVX1-NEXT:    vxorps %xmm7, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm7, %xmm5
-; AVX1-NEXT:    vpackssdw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vblendvps %xmm4, %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vandpd (%rdi), %ymm2, %ymm2
-; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm4
+; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm6
+; AVX1-NEXT:    vpsrlq $1, %xmm6, %xmm7
+; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm4
+; AVX1-NEXT:    vpsrlq $1, %xmm4, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm7, %ymm0
+; AVX1-NEXT:    vorpd %ymm5, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm3, %ymm0
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm3
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm5
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm5
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0],xmm3[3]
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm0
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
+; AVX1-NEXT:    vaddps %xmm0, %xmm0, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vblendvps %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandpd %ymm2, %ymm8, %ymm3
+; AVX1-NEXT:    vpsrlq $1, %xmm9, %xmm4
 ; AVX1-NEXT:    vpsrlq $1, %xmm1, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; AVX1-NEXT:    vorpd %ymm2, %ymm4, %ymm2
-; AVX1-NEXT:    vblendvpd %xmm0, %xmm2, %xmm0, %xmm4
-; AVX1-NEXT:    vpextrq $1, %xmm4, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm5
-; AVX1-NEXT:    vmovq %xmm4, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm4
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
+; AVX1-NEXT:    vorpd %ymm3, %ymm4, %ymm3
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm3, %ymm2, %ymm2
+; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm3
+; AVX1-NEXT:    vmovq %xmm2, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm4
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vblendvpd %xmm1, %xmm2, %xmm1, %xmm2
 ; AVX1-NEXT:    vmovq %xmm2, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm5
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3]
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm4
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
 ; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm8, %xmm2
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0]
-; AVX1-NEXT:    vaddps %xmm2, %xmm2, %xmm4
-; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm7, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vblendvps %xmm0, %xmm4, %xmm2, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm10, %xmm2
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
+; AVX1-NEXT:    vaddps %xmm2, %xmm2, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm9, %xmm1
+; AVX1-NEXT:    vblendvps %xmm1, %xmm3, %xmm2, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: uitofp_load_8i64_to_8f32:

diff  --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 7c35ccebefb7..8ea92294fc03 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -7492,14 +7492,13 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm3
 ; AVX1-NEXT:    vorpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
-; AVX1-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX1-NEXT:    vmovq %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX1-NEXT:    vmovq %xmm3, %rax
-; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
-; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vmovq %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
@@ -7507,8 +7506,6 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
 ; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm3
-; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
 ; AVX1-NEXT:    vzeroupper


        


More information about the llvm-commits mailing list