[llvm] 35a9631 - [X86][SelectionDAG] Handle the case for gather where index is SHL (#139703)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 19 09:12:52 PDT 2025


Author: Rohit Aggarwal
Date: 2025-05-19T17:12:48+01:00
New Revision: 35a9631279268b6d3d0f0826da0c09e78db6529a

URL: https://github.com/llvm/llvm-project/commit/35a9631279268b6d3d0f0826da0c09e78db6529a
DIFF: https://github.com/llvm/llvm-project/commit/35a9631279268b6d3d0f0826da0c09e78db6529a.diff

LOG: [X86][SelectionDAG] Handle the case for gather where index is SHL (#139703)

Fix the Gather's Index for SHL Opcode in which shift amount is 4 or greater.

It is in the continuity of #137813

---------

Co-authored-by: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Co-authored-by: Simon Pilgrim <llvm-dev at redking.me.uk>

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/masked_gather_scatter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b6b0d7b86c9c..df305637b1e4b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -56725,7 +56725,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
         return SDValue(N, 0);
       }
       if (auto MinShAmt = DAG.getValidMinimumShiftAmount(Index)) {
-        if (*MinShAmt >= 1 && (*MinShAmt + Log2ScaleAmt) < 4 &&
+        if (*MinShAmt >= 1 && Log2ScaleAmt < 3 &&
             DAG.ComputeNumSignBits(Index.getOperand(0)) > 1) {
           SDValue ShAmt = Index.getOperand(1);
           SDValue NewShAmt =

diff  --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index a5c727e8df9d6..af018d83d520e 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4808,16 +4808,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
 ; X64-KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; X64-KNL-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-KNL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-KNL-NEXT:    vpsllq $4, %zmm0, %zmm0
-; X64-KNL-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-KNL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; X64-KNL-NEXT:    kshiftrw $8, %k1, %k2
-; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
-; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
-; X64-KNL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-KNL-NEXT:    vpaddd %zmm0, %zmm0, %zmm0
+; X64-KNL-NEXT:    vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
+; X64-KNL-NEXT:    vmovaps %zmm1, %zmm0
 ; X64-KNL-NEXT:    retq
 ;
 ; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4839,16 +4832,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
 ; X64-SKX-SMALL-NEXT:    vpmovd2m %zmm0, %k1
 ; X64-SKX-SMALL-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-SKX-SMALL-NEXT:    vpsllq $4, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-SKX-SMALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; X64-SKX-SMALL-NEXT:    kshiftrw $8, %k1, %k2
-; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
-; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
-; X64-SKX-SMALL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-SMALL-NEXT:    vpaddd %zmm0, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
+; X64-SKX-SMALL-NEXT:    vmovaps %zmm1, %zmm0
 ; X64-SKX-SMALL-NEXT:    retq
 ;
 ; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4859,16 +4845,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
 ; X64-SKX-LARGE-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-SKX-LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
 ; X64-SKX-LARGE-NEXT:    vpandd (%rax){1to16}, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-SKX-LARGE-NEXT:    vpsllq $4, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-SKX-LARGE-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; X64-SKX-LARGE-NEXT:    kshiftrw $8, %k1, %k2
-; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
-; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
-; X64-SKX-LARGE-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-LARGE-NEXT:    vpaddd %zmm0, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT:    vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
+; X64-SKX-LARGE-NEXT:    vmovaps %zmm1, %zmm0
 ; X64-SKX-LARGE-NEXT:    retq
 ;
 ; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4898,16 +4877,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
 ; X64-KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; X64-KNL-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-KNL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-KNL-NEXT:    vpsllq $4, %zmm0, %zmm0
-; X64-KNL-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-KNL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; X64-KNL-NEXT:    kshiftrw $8, %k1, %k2
-; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
-; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
-; X64-KNL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-KNL-NEXT:    vpaddd %zmm0, %zmm0, %zmm0
+; X64-KNL-NEXT:    vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
+; X64-KNL-NEXT:    vmovaps %zmm1, %zmm0
 ; X64-KNL-NEXT:    retq
 ;
 ; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4929,16 +4901,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
 ; X64-SKX-SMALL-NEXT:    vpmovd2m %zmm0, %k1
 ; X64-SKX-SMALL-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-SKX-SMALL-NEXT:    vpsllq $4, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-SKX-SMALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; X64-SKX-SMALL-NEXT:    kshiftrw $8, %k1, %k2
-; X64-SKX-SMALL-NEXT:    vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
-; X64-SKX-SMALL-NEXT:    vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
-; X64-SKX-SMALL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-SMALL-NEXT:    vpaddd %zmm0, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
+; X64-SKX-SMALL-NEXT:    vmovaps %zmm1, %zmm0
 ; X64-SKX-SMALL-NEXT:    retq
 ;
 ; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4949,16 +4914,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
 ; X64-SKX-LARGE-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-SKX-LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
 ; X64-SKX-LARGE-NEXT:    vpandd (%rax){1to16}, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-SKX-LARGE-NEXT:    vpsllq $4, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-SKX-LARGE-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; X64-SKX-LARGE-NEXT:    kshiftrw $8, %k1, %k2
-; X64-SKX-LARGE-NEXT:    vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
-; X64-SKX-LARGE-NEXT:    vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
-; X64-SKX-LARGE-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-LARGE-NEXT:    vpaddd %zmm0, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT:    vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
+; X64-SKX-LARGE-NEXT:    vmovaps %zmm1, %zmm0
 ; X64-SKX-LARGE-NEXT:    retq
 ;
 ; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4988,23 +4946,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
 ; X64-KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; X64-KNL-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-KNL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-KNL-NEXT:    vpsllq $4, %zmm0, %zmm3
-; X64-KNL-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-KNL-NEXT:    vextractf64x4 $1, %zmm1, %ymm4
-; X64-KNL-NEXT:    kshiftrw $8, %k1, %k2
-; X64-KNL-NEXT:    kmovw %k2, %k3
-; X64-KNL-NEXT:    vmovaps %ymm4, %ymm0
-; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
-; X64-KNL-NEXT:    vmovaps %ymm1, %ymm5
-; X64-KNL-NEXT:    kmovw %k1, %k3
-; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
-; X64-KNL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
-; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
-; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
-; X64-KNL-NEXT:    vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
+; X64-KNL-NEXT:    vpaddd %zmm0, %zmm0, %zmm2
+; X64-KNL-NEXT:    kmovw %k1, %k2
+; X64-KNL-NEXT:    vmovaps %zmm1, %zmm0
+; X64-KNL-NEXT:    vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
+; X64-KNL-NEXT:    vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
 ; X64-KNL-NEXT:    retq
 ;
 ; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_pair:
@@ -5028,23 +4974,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
 ; X64-SKX-SMALL-NEXT:    vpmovd2m %zmm0, %k1
 ; X64-SKX-SMALL-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-SKX-SMALL-NEXT:    vpsllq $4, %zmm0, %zmm3
-; X64-SKX-SMALL-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-SKX-SMALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm4
-; X64-SKX-SMALL-NEXT:    kshiftrw $8, %k1, %k2
-; X64-SKX-SMALL-NEXT:    kmovw %k2, %k3
-; X64-SKX-SMALL-NEXT:    vmovaps %ymm4, %ymm0
-; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
-; X64-SKX-SMALL-NEXT:    vmovaps %ymm1, %ymm5
-; X64-SKX-SMALL-NEXT:    kmovw %k1, %k3
-; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
-; X64-SKX-SMALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
-; X64-SKX-SMALL-NEXT:    vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
-; X64-SKX-SMALL-NEXT:    vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
-; X64-SKX-SMALL-NEXT:    vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
+; X64-SKX-SMALL-NEXT:    vpaddd %zmm0, %zmm0, %zmm2
+; X64-SKX-SMALL-NEXT:    kmovw %k1, %k2
+; X64-SKX-SMALL-NEXT:    vmovaps %zmm1, %zmm0
+; X64-SKX-SMALL-NEXT:    vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
+; X64-SKX-SMALL-NEXT:    vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
 ; X64-SKX-SMALL-NEXT:    retq
 ;
 ; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_pair:
@@ -5055,23 +4989,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
 ; X64-SKX-LARGE-NEXT:    vmovdqu64 (%rsi), %zmm0
 ; X64-SKX-LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
 ; X64-SKX-LARGE-NEXT:    vpandd (%rax){1to16}, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
-; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; X64-SKX-LARGE-NEXT:    vpsllq $4, %zmm0, %zmm3
-; X64-SKX-LARGE-NEXT:    vpsllq $4, %zmm2, %zmm2
-; X64-SKX-LARGE-NEXT:    vextractf64x4 $1, %zmm1, %ymm4
-; X64-SKX-LARGE-NEXT:    kshiftrw $8, %k1, %k2
-; X64-SKX-LARGE-NEXT:    vmovaps %ymm4, %ymm0
-; X64-SKX-LARGE-NEXT:    kmovw %k2, %k3
-; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
-; X64-SKX-LARGE-NEXT:    vmovaps %ymm1, %ymm5
-; X64-SKX-LARGE-NEXT:    kmovw %k1, %k3
-; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
-; X64-SKX-LARGE-NEXT:    vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
-; X64-SKX-LARGE-NEXT:    vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
-; X64-SKX-LARGE-NEXT:    vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
-; X64-SKX-LARGE-NEXT:    vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
+; X64-SKX-LARGE-NEXT:    vpaddd %zmm0, %zmm0, %zmm2
+; X64-SKX-LARGE-NEXT:    kmovw %k1, %k2
+; X64-SKX-LARGE-NEXT:    vmovaps %zmm1, %zmm0
+; X64-SKX-LARGE-NEXT:    vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
+; X64-SKX-LARGE-NEXT:    vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
 ; X64-SKX-LARGE-NEXT:    retq
 ;
 ; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_pair:


        


More information about the llvm-commits mailing list