[llvm] r318468 - [X86] Add DAG combine to remove sext i32->i64 from gather/scatter instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 16 15:09:06 PST 2017
Author: ctopper
Date: Thu Nov 16 15:09:06 2017
New Revision: 318468
URL: http://llvm.org/viewvc/llvm-project?rev=318468&view=rev
Log:
[X86] Add DAG combine to remove sext i32->i64 from gather/scatter instructions.
Only do this pre-legalize in case we're using the sign extend to legalize for KNL.
This recovers all of the tests that changed when I stopped SelectionDAGBuilder from deleting sign extends.
There's more work that could be done here particularly to fix the i8->i64 test case that experienced split.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=318468&r1=318467&r2=318468&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Nov 16 15:09:06 2017
@@ -35836,7 +35836,7 @@ static SDValue combineGatherScatter(SDNo
// Pre-shrink oversized index elements to avoid triggering scalarization.
if (DCI.isBeforeLegalize()) {
SDValue Index = N->getOperand(4);
- if (Index.getValueType().getScalarSizeInBits() > 64) {
+ if (Index.getScalarValueSizeInBits() > 64) {
EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), MVT::i64,
Index.getValueType().getVectorNumElements());
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
@@ -35846,6 +35846,27 @@ static SDValue combineGatherScatter(SDNo
DCI.AddToWorklist(N);
return SDValue(N, 0);
}
+ }
+
+ // Try to remove sign extends from i32 to i64 on the index.
+ // Only do this before legalize in case we are relying on it for
+ // legalization.
+ // TODO: We should maybe remove any sign extend once we learn how to sign
+ // extend narrow index during lowering.
+ if (DCI.isBeforeLegalizeOps()) {
+ SDValue Index = N->getOperand(4);
+ if (Index.getScalarValueSizeInBits() == 64 &&
+ Index.getOpcode() == ISD::SIGN_EXTEND &&
+ Index.getOperand(0).getScalarValueSizeInBits() == 32) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ NewOps[4] = Index.getOperand(0);
+ DAG.UpdateNodeOperands(N, NewOps);
+ // The original sign extend has less users, add back to worklist in case
+ // it needs to be removed.
+ DCI.AddToWorklist(Index.getNode());
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
// Gather and Scatter instructions use k-registers for masks. The type of
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=318468&r1=318467&r2=318468&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Thu Nov 16 15:09:06 2017
@@ -19,52 +19,32 @@
define <16 x float> @test1(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test1:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: kxnorw %k0, %k0, %k2
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test1:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: kxnorw %k0, %k0, %k2
-; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test1:
; SKX: # BB#0:
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: kxnorw %k0, %k0, %k2
-; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test1:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: kxnorw %k0, %k0, %k2
-; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
@@ -96,52 +76,32 @@ declare <8 x i32> @llvm.masked.gather.v8
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
; KNL_64-LABEL: test2:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kmovw %esi, %k1
-; KNL_64-NEXT: kshiftrw $8, %k1, %k2
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test2:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; KNL_32-NEXT: kshiftrw $8, %k1, %k2
-; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test2:
; SKX: # BB#0:
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kmovw %esi, %k1
-; SKX-NEXT: kshiftrw $8, %k1, %k2
-; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test2:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; SKX_32-NEXT: kshiftrw $8, %k1, %k2
-; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
@@ -157,52 +117,32 @@ define <16 x float> @test2(float* %base,
define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
; KNL_64-LABEL: test3:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kmovw %esi, %k1
-; KNL_64-NEXT: kshiftrw $8, %k1, %k2
-; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k2}
-; KNL_64-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test3:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; KNL_32-NEXT: kshiftrw $8, %k1, %k2
-; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k2}
-; KNL_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test3:
; SKX: # BB#0:
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kmovw %esi, %k1
-; SKX-NEXT: kshiftrw $8, %k1, %k2
-; SKX-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k2}
-; SKX-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm0 {%k1}
-; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test3:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; SKX_32-NEXT: kshiftrw $8, %k1, %k2
-; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k2}
-; SKX_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm0 {%k1}
-; SKX_32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
@@ -731,52 +671,32 @@ define <16 x float> @test11(float* %base
define <16 x float> @test12(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test12:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: kxnorw %k0, %k0, %k2
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test12:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: kxnorw %k0, %k0, %k2
-; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test12:
; SKX: # BB#0:
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: kxnorw %k0, %k0, %k2
-; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test12:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: kxnorw %k0, %k0, %k2
-; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
@@ -790,52 +710,32 @@ define <16 x float> @test12(float* %base
define <16 x float> @test13(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test13:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: kxnorw %k0, %k0, %k2
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test13:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: kxnorw %k0, %k0, %k2
-; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test13:
; SKX: # BB#0:
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: kxnorw %k0, %k0, %k2
-; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
-; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test13:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: kxnorw %k0, %k0, %k2
-; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
-; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
@@ -911,8 +811,9 @@ declare <2 x double> @llvm.masked.gather
define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
; KNL_64-LABEL: test15:
; KNL_64: # BB#0:
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
-; KNL_64-NEXT: vpmovsxdq %xmm0, %ymm2
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
@@ -922,9 +823,10 @@ define <4 x float> @test15(float* %base,
;
; KNL_32-LABEL: test15:
; KNL_32: # BB#0:
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %xmm0, %ymm2
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
@@ -936,9 +838,8 @@ define <4 x float> @test15(float* %base,
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
-; SKX-NEXT: vpmovsxdq %xmm0, %ymm1
-; SKX-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1}
-; SKX-NEXT: vzeroupper
+; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
+; SKX-NEXT: vmovaps %xmm1, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test15:
@@ -946,9 +847,8 @@ define <4 x float> @test15(float* %base,
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %xmm0, %ymm1
-; SKX_32-NEXT: vgatherqps (%eax,%ymm1,4), %xmm0 {%k1}
-; SKX_32-NEXT: vzeroupper
+; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
+; SKX_32-NEXT: vmovaps %xmm1, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <4 x i32> %ind to <4 x i64>
@@ -962,11 +862,12 @@ define <4 x double> @test16(double* %bas
; KNL_64-LABEL: test16:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vmovdqa %ymm1, %ymm1
-; KNL_64-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -976,12 +877,13 @@ define <4 x double> @test16(double* %bas
; KNL_32-LABEL: test16:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vmovdqa %ymm1, %ymm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
@@ -992,8 +894,7 @@ define <4 x double> @test16(double* %bas
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
-; SKX-NEXT: vpmovsxdq %xmm0, %ymm0
-; SKX-NEXT: vgatherqpd (%rdi,%ymm0,8), %ymm2 {%k1}
+; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1}
; SKX-NEXT: vmovapd %ymm2, %ymm0
; SKX-NEXT: retq
;
@@ -1002,8 +903,7 @@ define <4 x double> @test16(double* %bas
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %xmm0, %ymm0
-; SKX_32-NEXT: vgatherqpd (%eax,%ymm0,8), %ymm2 {%k1}
+; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1}
; SKX_32-NEXT: vmovapd %ymm2, %ymm0
; SKX_32-NEXT: retl
@@ -1017,9 +917,8 @@ define <2 x double> @test17(double* %bas
; KNL_64-LABEL: test17:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
-; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -1030,10 +929,9 @@ define <2 x double> @test17(double* %bas
; KNL_32-LABEL: test17:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
@@ -1045,8 +943,6 @@ define <2 x double> @test17(double* %bas
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
-; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovapd %xmm2, %xmm0
; SKX-NEXT: retq
@@ -1056,8 +952,6 @@ define <2 x double> @test17(double* %bas
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovapd %xmm2, %xmm0
; SKX_32-NEXT: retl
@@ -1268,10 +1162,10 @@ define <2 x float> @test22(float* %base,
; KNL_64-LABEL: test22:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
; KNL_64-NEXT: vmovaps %xmm1, %xmm1
-; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
@@ -1282,11 +1176,11 @@ define <2 x float> @test22(float* %base,
; KNL_32-LABEL: test22:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
; KNL_32-NEXT: vmovaps %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
@@ -1296,22 +1190,20 @@ define <2 x float> @test22(float* %base,
;
; SKX-LABEL: test22:
; SKX: # BB#0:
+; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
-; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
-; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1}
+; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
; SKX-NEXT: vmovaps %xmm2, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test22:
; SKX_32: # BB#0:
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1}
+; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1}
; SKX_32-NEXT: vmovaps %xmm2, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
@@ -1376,9 +1268,8 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_64-LABEL: test23:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
-; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -1389,10 +1280,9 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_32-LABEL: test23:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
@@ -1404,8 +1294,6 @@ define <2 x i32> @test23(i32* %base, <2
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
-; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1416,8 +1304,6 @@ define <2 x i32> @test23(i32* %base, <2
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1431,32 +1317,28 @@ define <2 x i32> @test23(i32* %base, <2
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test24:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm1
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
-; KNL_64-NEXT: vpgatherqq (%rdi,%zmm1,8), %zmm0 {%k1}
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test24:
; KNL_32: # BB#0:
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm1
-; KNL_32-NEXT: vmovdqa {{.*#+}} xmm0 = [1,0,1,0]
-; KNL_32-NEXT: vpsllq $63, %zmm0, %zmm0
-; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k1
-; KNL_32-NEXT: vpgatherqq (%eax,%zmm1,8), %zmm0 {%k1}
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL_32-NEXT: vmovdqa {{.*#+}} xmm1 = [1,0,1,0]
+; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
; SKX-LABEL: test24:
; SKX: # BB#0:
-; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1465,8 +1347,6 @@ define <2 x i32> @test24(i32* %base, <2
; SKX_32-LABEL: test24:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1481,9 +1361,8 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_64-LABEL: test25:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
-; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -1494,10 +1373,9 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_32-LABEL: test25:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
@@ -1509,8 +1387,6 @@ define <2 x i64> @test25(i64* %base, <2
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
-; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovdqa %xmm2, %xmm0
; SKX-NEXT: retq
@@ -1520,8 +1396,6 @@ define <2 x i64> @test25(i64* %base, <2
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
; SKX_32-NEXT: retl
@@ -1535,8 +1409,7 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_64-LABEL: test26:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
@@ -1547,9 +1420,8 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_32-LABEL: test26:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0]
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
@@ -1560,8 +1432,6 @@ define <2 x i64> @test26(i64* %base, <2
;
; SKX-LABEL: test26:
; SKX: # BB#0:
-; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
; SKX-NEXT: vmovdqa %xmm1, %xmm0
@@ -1570,8 +1440,6 @@ define <2 x i64> @test26(i64* %base, <2
; SKX_32-LABEL: test26:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
; SKX_32-NEXT: vmovdqa %xmm1, %xmm0
@@ -1586,8 +1454,8 @@ define <2 x i64> @test26(i64* %base, <2
define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test27:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm1
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
@@ -1597,9 +1465,9 @@ define <2 x float> @test27(float* %base,
;
; KNL_32-LABEL: test27:
; KNL_32: # BB#0:
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm1
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
; KNL_32-NEXT: movb $3, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
@@ -1609,19 +1477,19 @@ define <2 x float> @test27(float* %base,
;
; SKX-LABEL: test27:
; SKX: # BB#0:
-; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX-NEXT: vpsraq $32, %xmm0, %xmm1
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1}
+; SKX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SKX-NEXT: movb $3, %al
+; SKX-NEXT: kmovw %eax, %k1
+; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test27:
; SKX_32: # BB#0:
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
-; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm1
-; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vgatherqps (%eax,%xmm1,4), %xmm0 {%k1}
+; SKX_32-NEXT: movb $3, %cl
+; SKX_32-NEXT: kmovw %ecx, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%xmm1,4), %xmm0 {%k1}
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
@@ -1685,56 +1553,36 @@ define void @test28(<2 x i32>%a1, <2 x i
define <16 x float> @test29(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test29:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
-; KNL_64-NEXT: kxorw %k0, %k0, %k1
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
-; KNL_64-NEXT: movb $44, %al
+; KNL_64-NEXT: movw $44, %ax
; KNL_64-NEXT: kmovw %eax, %k1
-; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test29:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
-; KNL_32-NEXT: kxorw %k0, %k0, %k1
-; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
-; KNL_32-NEXT: movb $44, %cl
+; KNL_32-NEXT: movw $44, %cx
; KNL_32-NEXT: kmovw %ecx, %k1
-; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test29:
; SKX: # BB#0:
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
-; SKX-NEXT: kxorw %k0, %k0, %k1
-; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
-; SKX-NEXT: movb $44, %al
+; SKX-NEXT: movw $44, %ax
; SKX-NEXT: kmovw %eax, %k1
-; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test29:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
-; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
-; SKX_32-NEXT: kxorw %k0, %k0, %k1
-; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
-; SKX_32-NEXT: movb $44, %cl
+; SKX_32-NEXT: movw $44, %cx
; SKX_32-NEXT: kmovw %ecx, %k1
-; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
More information about the llvm-commits
mailing list