[llvm] r318466 - [X86] Don't remove sign extend of gather/scatter indices during SelectionDAGBuilder.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 16 15:08:58 PST 2017
Author: ctopper
Date: Thu Nov 16 15:08:57 2017
New Revision: 318466
URL: http://llvm.org/viewvc/llvm-project?rev=318466&view=rev
Log:
[X86] Don't remove sign extend of gather/scatter indices during SelectionDAGBuilder.
The sign extend might be from an i16 or i8 type and was inserted by InstCombine to match the pointer width. X86 gather legalization isn't currently detecting this to reinsert a sign extend to make things legal.
It's a bit weird for the SelectionDAGBuilder to do this kind of optimization in the first place. With this removed we can at least lean on InstCombine somewhat to ensure the index is i32 or i64.
I'll work on trying to recover some of the test cases by removing sign extends in the backend when its safe to do so with an understanding of the current legalizer capabilities.
This should fix PR30690.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=318466&r1=318465&r2=318466&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Nov 16 15:08:57 2017
@@ -3902,13 +3902,6 @@ static bool getUniformBase(const Value*
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
- // Suppress sign extension.
- if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
- if (SDB->findValue(Sext->getOperand(0))) {
- IndexVal = Sext->getOperand(0);
- Index = SDB->getValue(IndexVal);
- }
- }
if (!Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=318466&r1=318465&r2=318466&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Thu Nov 16 15:08:57 2017
@@ -19,32 +19,52 @@
define <16 x float> @test1(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test1:
; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: kxnorw %k0, %k0, %k2
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test1:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: kxnorw %k0, %k0, %k2
+; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test1:
; SKX: # BB#0:
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: kxnorw %k0, %k0, %k2
+; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test1:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: kxnorw %k0, %k0, %k2
+; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
@@ -76,32 +96,52 @@ declare <8 x i32> @llvm.masked.gather.v8
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
; KNL_64-LABEL: test2:
; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kmovw %esi, %k1
-; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test2:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: kshiftrw $8, %k1, %k2
+; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test2:
; SKX: # BB#0:
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kmovw %esi, %k1
-; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test2:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: kshiftrw $8, %k1, %k2
+; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
@@ -117,32 +157,52 @@ define <16 x float> @test2(float* %base,
define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
; KNL_64-LABEL: test3:
; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kmovw %esi, %k1
-; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k2}
+; KNL_64-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test3:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_32-NEXT: kshiftrw $8, %k1, %k2
+; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k2}
+; KNL_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test3:
; SKX: # BB#0:
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kmovw %esi, %k1
-; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k2}
+; SKX-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm0 {%k1}
+; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test3:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
-; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; SKX_32-NEXT: kshiftrw $8, %k1, %k2
+; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k2}
+; SKX_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm0 {%k1}
+; SKX_32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
@@ -671,32 +731,52 @@ define <16 x float> @test11(float* %base
define <16 x float> @test12(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test12:
; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: kxnorw %k0, %k0, %k2
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test12:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: kxnorw %k0, %k0, %k2
+; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test12:
; SKX: # BB#0:
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: kxnorw %k0, %k0, %k2
+; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test12:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: kxnorw %k0, %k0, %k2
+; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
@@ -710,32 +790,52 @@ define <16 x float> @test12(float* %base
define <16 x float> @test13(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test13:
; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: kxnorw %k0, %k0, %k2
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test13:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: kxnorw %k0, %k0, %k2
+; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test13:
; SKX: # BB#0:
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: kxnorw %k0, %k0, %k2
+; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
+; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test13:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: kxnorw %k0, %k0, %k2
+; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
+; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
@@ -811,9 +911,8 @@ declare <2 x double> @llvm.masked.gather
define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
; KNL_64-LABEL: test15:
; KNL_64: # BB#0:
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
+; KNL_64-NEXT: vpmovsxdq %xmm0, %ymm2
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
@@ -823,10 +922,9 @@ define <4 x float> @test15(float* %base,
;
; KNL_32-LABEL: test15:
; KNL_32: # BB#0:
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
+; KNL_32-NEXT: vpmovsxdq %xmm0, %ymm2
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
@@ -838,8 +936,9 @@ define <4 x float> @test15(float* %base,
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
-; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
-; SKX-NEXT: vmovaps %xmm1, %xmm0
+; SKX-NEXT: vpmovsxdq %xmm0, %ymm1
+; SKX-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
;
; SKX_32-LABEL: test15:
@@ -847,8 +946,9 @@ define <4 x float> @test15(float* %base,
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
-; SKX_32-NEXT: vmovaps %xmm1, %xmm0
+; SKX_32-NEXT: vpmovsxdq %xmm0, %ymm1
+; SKX_32-NEXT: vgatherqps (%eax,%ymm1,4), %xmm0 {%k1}
+; SKX_32-NEXT: vzeroupper
; SKX_32-NEXT: retl
%sext_ind = sext <4 x i32> %ind to <4 x i64>
@@ -862,12 +962,11 @@ define <4 x double> @test16(double* %bas
; KNL_64-LABEL: test16:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vmovdqa %ymm1, %ymm1
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT: vpmovsxdq %xmm0, %ymm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -877,13 +976,12 @@ define <4 x double> @test16(double* %bas
; KNL_32-LABEL: test16:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vmovdqa %ymm1, %ymm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT: vpmovsxdq %xmm0, %ymm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
@@ -894,7 +992,8 @@ define <4 x double> @test16(double* %bas
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
-; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1}
+; SKX-NEXT: vpmovsxdq %xmm0, %ymm0
+; SKX-NEXT: vgatherqpd (%rdi,%ymm0,8), %ymm2 {%k1}
; SKX-NEXT: vmovapd %ymm2, %ymm0
; SKX-NEXT: retq
;
@@ -903,7 +1002,8 @@ define <4 x double> @test16(double* %bas
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1}
+; SKX_32-NEXT: vpmovsxdq %xmm0, %ymm0
+; SKX_32-NEXT: vgatherqpd (%eax,%ymm0,8), %ymm2 {%k1}
; SKX_32-NEXT: vmovapd %ymm2, %ymm0
; SKX_32-NEXT: retl
@@ -917,8 +1017,9 @@ define <2 x double> @test17(double* %bas
; KNL_64-LABEL: test17:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -929,9 +1030,10 @@ define <2 x double> @test17(double* %bas
; KNL_32-LABEL: test17:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
@@ -943,6 +1045,8 @@ define <2 x double> @test17(double* %bas
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovapd %xmm2, %xmm0
; SKX-NEXT: retq
@@ -952,6 +1056,8 @@ define <2 x double> @test17(double* %bas
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovapd %xmm2, %xmm0
; SKX_32-NEXT: retl
@@ -1162,10 +1268,10 @@ define <2 x float> @test22(float* %base,
; KNL_64-LABEL: test22:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
-; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
; KNL_64-NEXT: vmovaps %xmm1, %xmm1
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
@@ -1176,11 +1282,11 @@ define <2 x float> @test22(float* %base,
; KNL_32-LABEL: test22:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
-; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
; KNL_32-NEXT: vmovaps %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
@@ -1190,20 +1296,22 @@ define <2 x float> @test22(float* %base,
;
; SKX-LABEL: test22:
; SKX: # BB#0:
-; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
-; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
+; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1}
; SKX-NEXT: vmovaps %xmm2, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test22:
; SKX_32: # BB#0:
-; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1}
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1}
; SKX_32-NEXT: vmovaps %xmm2, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
@@ -1268,8 +1376,9 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_64-LABEL: test23:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -1280,9 +1389,10 @@ define <2 x i32> @test23(i32* %base, <2
; KNL_32-LABEL: test23:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
@@ -1294,6 +1404,8 @@ define <2 x i32> @test23(i32* %base, <2
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1304,6 +1416,8 @@ define <2 x i32> @test23(i32* %base, <2
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1317,28 +1431,32 @@ define <2 x i32> @test23(i32* %base, <2
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test24:
; KNL_64: # BB#0:
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm1
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
-; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
-; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm1,8), %zmm0 {%k1}
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test24:
; KNL_32: # BB#0:
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vmovdqa {{.*#+}} xmm1 = [1,0,1,0]
-; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
-; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
-; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
-; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm1
+; KNL_32-NEXT: vmovdqa {{.*#+}} xmm0 = [1,0,1,0]
+; KNL_32-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm1,8), %zmm0 {%k1}
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
; SKX-LABEL: test24:
; SKX: # BB#0:
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1347,6 +1465,8 @@ define <2 x i32> @test24(i32* %base, <2
; SKX_32-LABEL: test24:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
@@ -1361,8 +1481,9 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_64-LABEL: test25:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
@@ -1373,9 +1494,10 @@ define <2 x i64> @test25(i64* %base, <2
; KNL_32-LABEL: test25:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
@@ -1387,6 +1509,8 @@ define <2 x i64> @test25(i64* %base, <2
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovdqa %xmm2, %xmm0
; SKX-NEXT: retq
@@ -1396,6 +1520,8 @@ define <2 x i64> @test25(i64* %base, <2
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
; SKX_32-NEXT: retl
@@ -1409,7 +1535,8 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_64-LABEL: test26:
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
@@ -1420,8 +1547,9 @@ define <2 x i64> @test26(i64* %base, <2
; KNL_32-LABEL: test26:
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0]
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
@@ -1432,6 +1560,8 @@ define <2 x i64> @test26(i64* %base, <2
;
; SKX-LABEL: test26:
; SKX: # BB#0:
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
; SKX-NEXT: vmovdqa %xmm1, %xmm0
@@ -1440,6 +1570,8 @@ define <2 x i64> @test26(i64* %base, <2
; SKX_32-LABEL: test26:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
; SKX_32-NEXT: vmovdqa %xmm1, %xmm0
@@ -1454,8 +1586,8 @@ define <2 x i64> @test26(i64* %base, <2
define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test27:
; KNL_64: # BB#0:
-; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm1
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
@@ -1465,9 +1597,9 @@ define <2 x float> @test27(float* %base,
;
; KNL_32-LABEL: test27:
; KNL_32: # BB#0:
-; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm1
; KNL_32-NEXT: movb $3, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
@@ -1477,19 +1609,19 @@ define <2 x float> @test27(float* %base,
;
; SKX-LABEL: test27:
; SKX: # BB#0:
-; SKX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
-; SKX-NEXT: movb $3, %al
-; SKX-NEXT: kmovw %eax, %k1
-; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm1
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test27:
; SKX_32: # BB#0:
-; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: movb $3, %cl
-; SKX_32-NEXT: kmovw %ecx, %k1
-; SKX_32-NEXT: vgatherdps (%eax,%xmm1,4), %xmm0 {%k1}
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherqps (%eax,%xmm1,4), %xmm0 {%k1}
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
@@ -1553,36 +1685,56 @@ define void @test28(<2 x i32>%a1, <2 x i
define <16 x float> @test29(float* %base, <16 x i32> %ind) {
; KNL_64-LABEL: test29:
; KNL_64: # BB#0:
-; KNL_64-NEXT: movw $44, %ax
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT: kxorw %k0, %k0, %k1
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
+; KNL_64-NEXT: movb $44, %al
; KNL_64-NEXT: kmovw %eax, %k1
-; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test29:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: movw $44, %cx
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT: kxorw %k0, %k0, %k1
+; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
+; KNL_32-NEXT: movb $44, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
-; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test29:
; SKX: # BB#0:
-; SKX-NEXT: movw $44, %ax
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
+; SKX-NEXT: kxorw %k0, %k0, %k1
+; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
+; SKX-NEXT: movb $44, %al
; SKX-NEXT: kmovw %eax, %k1
-; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test29:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: movw $44, %cx
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
+; SKX_32-NEXT: kxorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
+; SKX_32-NEXT: movb $44, %cl
; SKX_32-NEXT: kmovw %ecx, %k1
-; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
More information about the llvm-commits
mailing list