[llvm] Add the new test cases for gather scalar (PR #137416)
Rohit Aggarwal via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 01:59:31 PDT 2025
https://github.com/rohitaggarwal007 updated https://github.com/llvm/llvm-project/pull/137416
>From e3123222e9ba5faddbed51370c2b726fc99964c5 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Sat, 26 Apr 2025 04:53:29 +0530
Subject: [PATCH 1/2] Add the testcases
---
.../test/CodeGen/X86/masked_gather_scatter.ll | 513 +++++++++++++++++-
1 file changed, 512 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 46e589b7b1be9..33182b8faafd1 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4812,6 +4812,7 @@ declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x
; Test gathers from struct
%struct.pt = type { float, float, float, i32 }
+%struct.pt2 = type { float, float }
define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index:
@@ -5107,7 +5108,517 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
%res1 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs1, i32 4, <16 x i1> %mask, <16 x float> %src0)
%ptrs = getelementptr inbounds %struct.pt, ptr %x, <16 x i64> %zext, i32 1
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
- %pair1 = insertvalue {<16 x float>, <16 x float>} undef, <16 x float> %res1, 0
+ %pair1 = insertvalue {<16 x float>, <16 x float>} poison, <16 x float> %res1, 0
+ %pair2 = insertvalue {<16 x float>, <16 x float>} %pair1, <16 x float> %res, 1
+ ret {<16 x float>, <16 x float>} %pair2
+}
+
+define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; X64-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; X64-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; X64-KNL-NEXT: vpbroadcastd {{.*#+}} ymm0 = [536870911,536870911,536870911,536870911,536870911,536870911,536870911,536870911]
+; X64-KNL-NEXT: vpand (%rsi), %ymm0, %ymm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vgatherqps (%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-KNL-NEXT: vmovaps %ymm1, %ymm0
+; X64-KNL-NEXT: retq
+;
+; X86-KNL-LABEL: test_gather_structpt_8f32_mask_index:
+; X86-KNL: # %bb.0:
+; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; X86-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; X86-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT: vmovdqu (%ecx), %ymm0
+; X86-KNL-NEXT: vpslld $3, %ymm0, %ymm0
+; X86-KNL-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
+; X86-KNL-NEXT: vmovaps %ymm1, %ymm0
+; X86-KNL-NEXT: retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-SKX-SMALL: # %bb.0:
+; X64-SKX-SMALL-NEXT: vpmovsxwd %xmm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpslld $31, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovd2m %ymm0, %k1
+; X64-SKX-SMALL-NEXT: vmovdqu (%rsi), %ymm0
+; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm0
+; X64-SKX-SMALL-NEXT: retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-SKX-LARGE: # %bb.0:
+; X64-SKX-LARGE-NEXT: vpmovsxwd %xmm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpslld $31, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovd2m %ymm0, %k1
+; X64-SKX-LARGE-NEXT: vmovdqu (%rsi), %ymm0
+; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT: vpandd (%rax){1to8}, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm0
+; X64-SKX-LARGE-NEXT: retq
+;
+; X86-SKX-LABEL: test_gather_structpt_8f32_mask_index:
+; X86-SKX: # %bb.0:
+; X86-SKX-NEXT: vpmovsxwd %xmm0, %ymm0
+; X86-SKX-NEXT: vpslld $31, %ymm0, %ymm0
+; X86-SKX-NEXT: vpmovd2m %ymm0, %k1
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT: vmovups (%ecx), %ymm0
+; X86-SKX-NEXT: vgatherdps (%eax,%ymm0,8), %ymm1 {%k1}
+; X86-SKX-NEXT: vmovaps %ymm1, %ymm0
+; X86-SKX-NEXT: retl
+ %wide.load = load <8 x i32>, ptr %arr, align 4
+ %and = and <8 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+ %zext = zext <8 x i32> %and to <8 x i64>
+ %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <8 x i64> %zext
+ %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %src0)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; X64-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; X64-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; X64-KNL-NEXT: vpbroadcastd {{.*#+}} ymm0 = [536870911,536870911,536870911,536870911,536870911,536870911,536870911,536870911]
+; X64-KNL-NEXT: vpand (%rsi), %ymm0, %ymm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-KNL-NEXT: vmovaps %ymm1, %ymm0
+; X64-KNL-NEXT: retq
+;
+; X86-KNL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X86-KNL: # %bb.0:
+; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; X86-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; X86-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT: vmovdqu (%ecx), %ymm0
+; X86-KNL-NEXT: vpslld $3, %ymm0, %ymm0
+; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
+; X86-KNL-NEXT: vmovaps %ymm1, %ymm0
+; X86-KNL-NEXT: retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-SKX-SMALL: # %bb.0:
+; X64-SKX-SMALL-NEXT: vpmovsxwd %xmm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpslld $31, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovd2m %ymm0, %k1
+; X64-SKX-SMALL-NEXT: vmovdqu (%rsi), %ymm0
+; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm0
+; X64-SKX-SMALL-NEXT: retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-SKX-LARGE: # %bb.0:
+; X64-SKX-LARGE-NEXT: vpmovsxwd %xmm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpslld $31, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovd2m %ymm0, %k1
+; X64-SKX-LARGE-NEXT: vmovdqu (%rsi), %ymm0
+; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT: vpandd (%rax){1to8}, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm0
+; X64-SKX-LARGE-NEXT: retq
+;
+; X86-SKX-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X86-SKX: # %bb.0:
+; X86-SKX-NEXT: vpmovsxwd %xmm0, %ymm0
+; X86-SKX-NEXT: vpslld $31, %ymm0, %ymm0
+; X86-SKX-NEXT: vpmovd2m %ymm0, %k1
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT: vmovups (%ecx), %ymm0
+; X86-SKX-NEXT: vgatherdps 4(%eax,%ymm0,8), %ymm1 {%k1}
+; X86-SKX-NEXT: vmovaps %ymm1, %ymm0
+; X86-SKX-NEXT: retl
+ %wide.load = load <8 x i32>, ptr %arr, align 4
+ %and = and <8 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+ %zext = zext <8 x i32> %and to <8 x i64>
+ %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <8 x i64> %zext, i32 1
+ %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %src0)
+ ret <8 x float> %res
+}
+
+define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_8f32_mask_index_pair:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; X64-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; X64-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; X64-KNL-NEXT: vpbroadcastd {{.*#+}} ymm0 = [536870911,536870911,536870911,536870911,536870911,536870911,536870911,536870911]
+; X64-KNL-NEXT: vpand (%rsi), %ymm0, %ymm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: kmovw %k1, %k2
+; X64-KNL-NEXT: vmovaps %ymm1, %ymm0
+; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm0 {%k2}
+; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-KNL-NEXT: retq
+;
+; X86-KNL-LABEL: test_gather_8f32_mask_index_pair:
+; X86-KNL: # %bb.0:
+; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; X86-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; X86-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT: vmovdqu (%ecx), %ymm0
+; X86-KNL-NEXT: vpslld $3, %ymm0, %ymm2
+; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
+; X86-KNL-NEXT: kmovw %k1, %k2
+; X86-KNL-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
+; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
+; X86-KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
+; X86-KNL-NEXT: retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_8f32_mask_index_pair:
+; X64-SKX-SMALL: # %bb.0:
+; X64-SKX-SMALL-NEXT: vpmovsxwd %xmm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpslld $31, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovd2m %ymm0, %k1
+; X64-SKX-SMALL-NEXT: vmovdqu (%rsi), %ymm0
+; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: kmovw %k1, %k2
+; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm0
+; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm0 {%k2}
+; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT: retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_8f32_mask_index_pair:
+; X64-SKX-LARGE: # %bb.0:
+; X64-SKX-LARGE-NEXT: vpmovsxwd %xmm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpslld $31, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovd2m %ymm0, %k1
+; X64-SKX-LARGE-NEXT: vmovdqu (%rsi), %ymm0
+; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT: vpandd (%rax){1to8}, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: kmovw %k1, %k2
+; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm0
+; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm0 {%k2}
+; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT: retq
+;
+; X86-SKX-LABEL: test_gather_8f32_mask_index_pair:
+; X86-SKX: # %bb.0:
+; X86-SKX-NEXT: vpmovsxwd %xmm0, %ymm0
+; X86-SKX-NEXT: vpslld $31, %ymm0, %ymm0
+; X86-SKX-NEXT: vpmovd2m %ymm0, %k1
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT: vmovups (%ecx), %ymm2
+; X86-SKX-NEXT: kmovw %k1, %k2
+; X86-SKX-NEXT: vmovaps %ymm1, %ymm0
+; X86-SKX-NEXT: vgatherdps (%eax,%ymm2,8), %ymm0 {%k2}
+; X86-SKX-NEXT: vgatherdps 4(%eax,%ymm2,8), %ymm1 {%k1}
+; X86-SKX-NEXT: retl
+ %wide.load = load <8 x i32>, ptr %arr, align 4
+ %and = and <8 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+ %zext = zext <8 x i32> %and to <8 x i64>
+ %ptrs1 = getelementptr inbounds %struct.pt2, ptr %x , <8 x i64> %zext
+ %res1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs1, i32 4, <8 x i1> %mask, <8 x float> %src0)
+ %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <8 x i64> %zext, i32 1
+ %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %src0)
+ %pair1 = insertvalue {<8 x float>, <8 x float>} poison, <8 x float> %res1, 0
+ %pair2 = insertvalue {<8 x float>, <8 x float>} %pair1, <8 x float> %res, 1
+ ret {<8 x float>, <8 x float>} %pair2
+}
+
+define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
+; X64-KNL-NEXT: vgatherqps (%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-KNL-NEXT: retq
+;
+; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X86-KNL: # %bb.0:
+; X86-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X86-KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT: vmovups (%ecx), %zmm0
+; X86-KNL-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
+; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
+; X86-KNL-NEXT: retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-SKX-SMALL: # %bb.0:
+; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
+; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
+; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-SMALL-NEXT: retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-SKX-LARGE: # %bb.0:
+; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
+; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
+; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-LARGE-NEXT: retq
+;
+; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index1:
+; X86-SKX: # %bb.0:
+; X86-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
+; X86-SKX-NEXT: vpslld $31, %zmm0, %zmm0
+; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT: vmovups (%ecx), %zmm0
+; X86-SKX-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
+; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
+; X86-SKX-NEXT: retl
+ %wide.load = load <16 x i32>, ptr %arr, align 4
+ %and = and <16 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+ %zext = zext <16 x i32> %and to <16 x i64>
+ %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <16 x i64> %zext
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
+; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-KNL-NEXT: retq
+;
+; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X86-KNL: # %bb.0:
+; X86-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X86-KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT: vmovups (%ecx), %zmm0
+; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
+; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
+; X86-KNL-NEXT: retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X64-SKX-SMALL: # %bb.0:
+; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
+; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
+; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-SMALL-NEXT: retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X64-SKX-LARGE: # %bb.0:
+; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
+; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
+; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-LARGE-NEXT: retq
+;
+; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X86-SKX: # %bb.0:
+; X86-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
+; X86-SKX-NEXT: vpslld $31, %zmm0, %zmm0
+; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT: vmovups (%ecx), %zmm0
+; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
+; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
+; X86-SKX-NEXT: retl
+ %wide.load = load <16 x i32>, ptr %arr, align 4
+ %and = and <16 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+ %zext = zext <16 x i32> %and to <16 x i64>
+ %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <16 x i64> %zext, i32 1
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+ ret <16 x float> %res
+}
+
+define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair2(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_16f32_mask_index_pair2:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
+; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
+; X64-KNL-NEXT: vmovaps %ymm4, %ymm0
+; X64-KNL-NEXT: kmovw %k2, %k3
+; X64-KNL-NEXT: vgatherqps (%rdi,%zmm3,8), %ymm0 {%k3}
+; X64-KNL-NEXT: vmovaps %ymm1, %ymm5
+; X64-KNL-NEXT: kmovw %k1, %k3
+; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm5 {%k3}
+; X64-KNL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
+; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm3,8), %ymm4 {%k2}
+; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-KNL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
+; X64-KNL-NEXT: retq
+;
+; X86-KNL-LABEL: test_gather_16f32_mask_index_pair2:
+; X86-KNL: # %bb.0:
+; X86-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X86-KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT: vmovups (%ecx), %zmm2
+; X86-KNL-NEXT: kmovw %k1, %k2
+; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
+; X86-KNL-NEXT: vgatherdps (%eax,%zmm2,8), %zmm0 {%k2}
+; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
+; X86-KNL-NEXT: retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_16f32_mask_index_pair2:
+; X64-SKX-SMALL: # %bb.0:
+; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
+; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
+; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
+; X64-SKX-SMALL-NEXT: vmovaps %ymm4, %ymm0
+; X64-SKX-SMALL-NEXT: kmovw %k2, %k3
+; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm3,8), %ymm0 {%k3}
+; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm5
+; X64-SKX-SMALL-NEXT: kmovw %k1, %k3
+; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm5 {%k3}
+; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
+; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm3,8), %ymm4 {%k2}
+; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
+; X64-SKX-SMALL-NEXT: retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_16f32_mask_index_pair2:
+; X64-SKX-LARGE: # %bb.0:
+; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
+; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm4
+; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
+; X64-SKX-LARGE-NEXT: kmovw %k2, %k3
+; X64-SKX-LARGE-NEXT: vmovaps %ymm4, %ymm0
+; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm3,8), %ymm0 {%k3}
+; X64-SKX-LARGE-NEXT: kmovw %k1, %k3
+; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm5
+; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2,8), %ymm5 {%k3}
+; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
+; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm3,8), %ymm4 {%k2}
+; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
+; X64-SKX-LARGE-NEXT: retq
+;
+; X86-SKX-LABEL: test_gather_16f32_mask_index_pair2:
+; X86-SKX: # %bb.0:
+; X86-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
+; X86-SKX-NEXT: vpslld $31, %zmm0, %zmm0
+; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT: vmovups (%ecx), %zmm2
+; X86-SKX-NEXT: kmovw %k1, %k2
+; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
+; X86-SKX-NEXT: vgatherdps (%eax,%zmm2,8), %zmm0 {%k2}
+; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
+; X86-SKX-NEXT: retl
+ %wide.load = load <16 x i32>, ptr %arr, align 4
+ %and = and <16 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+ %zext = zext <16 x i32> %and to <16 x i64>
+ %ptrs1 = getelementptr inbounds %struct.pt2, ptr %x, <16 x i64> %zext
+ %res1 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs1, i32 4, <16 x i1> %mask, <16 x float> %src0)
+ %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <16 x i64> %zext, i32 1
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+ %pair1 = insertvalue {<16 x float>, <16 x float>} poison, <16 x float> %res1, 0
%pair2 = insertvalue {<16 x float>, <16 x float>} %pair1, <16 x float> %res, 1
ret {<16 x float>, <16 x float>} %pair2
}
>From 6897ee2a2b928c642d4bc287cad5dd3fb20102a5 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 29 Apr 2025 14:28:56 +0530
Subject: [PATCH 2/2] Fix the testcases name
---
.../test/CodeGen/X86/masked_gather_scatter.ll | 84 +++++++++----------
1 file changed, 42 insertions(+), 42 deletions(-)
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 33182b8faafd1..8b8ae7ff9b088 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4994,8 +4994,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
ret <16 x float> %res
}
-define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
-; X64-KNL-LABEL: test_gather_16f32_mask_index_pair:
+define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index_pair:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5021,7 +5021,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-KNL-NEXT: retq
;
-; X86-KNL-LABEL: test_gather_16f32_mask_index_pair:
+; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_pair:
; X86-KNL: # %bb.0:
; X86-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5035,7 +5035,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
; X86-KNL-NEXT: retl
;
-; X64-SKX-SMALL-LABEL: test_gather_16f32_mask_index_pair:
+; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index_pair:
; X64-SKX-SMALL: # %bb.0:
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5061,7 +5061,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-SKX-SMALL-NEXT: retq
;
-; X64-SKX-LARGE-LABEL: test_gather_16f32_mask_index_pair:
+; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_pair:
; X64-SKX-LARGE: # %bb.0:
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5088,7 +5088,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-SKX-LARGE-NEXT: retq
;
-; X86-SKX-LABEL: test_gather_16f32_mask_index_pair:
+; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_pair:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-SKX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5113,8 +5113,8 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
ret {<16 x float>, <16 x float>} %pair2
}
-define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
-; X64-KNL-LABEL: test_gather_structpt_8f32_mask_index:
+define <8 x float> @test_gather_structpt2_8f32_mask_index(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt2_8f32_mask_index:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; X64-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
@@ -5126,7 +5126,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x
; X64-KNL-NEXT: vmovaps %ymm1, %ymm0
; X64-KNL-NEXT: retq
;
-; X86-KNL-LABEL: test_gather_structpt_8f32_mask_index:
+; X86-KNL-LABEL: test_gather_structpt2_8f32_mask_index:
; X86-KNL: # %bb.0:
; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; X86-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
@@ -5140,7 +5140,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x
; X86-KNL-NEXT: vmovaps %ymm1, %ymm0
; X86-KNL-NEXT: retl
;
-; X64-SKX-SMALL-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-SKX-SMALL-LABEL: test_gather_structpt2_8f32_mask_index:
; X64-SKX-SMALL: # %bb.0:
; X64-SKX-SMALL-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-SKX-SMALL-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5152,7 +5152,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x
; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm0
; X64-SKX-SMALL-NEXT: retq
;
-; X64-SKX-LARGE-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-SKX-LARGE-LABEL: test_gather_structpt2_8f32_mask_index:
; X64-SKX-LARGE: # %bb.0:
; X64-SKX-LARGE-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-SKX-LARGE-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5165,7 +5165,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x
; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm0
; X64-SKX-LARGE-NEXT: retq
;
-; X86-SKX-LABEL: test_gather_structpt_8f32_mask_index:
+; X86-SKX-LABEL: test_gather_structpt2_8f32_mask_index:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpmovsxwd %xmm0, %ymm0
; X86-SKX-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5184,8 +5184,8 @@ define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x
ret <8 x float> %res
}
-define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
-; X64-KNL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+define <8 x float> @test_gather_structpt2_8f32_mask_index_offset(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt2_8f32_mask_index_offset:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; X64-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
@@ -5197,7 +5197,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr
; X64-KNL-NEXT: vmovaps %ymm1, %ymm0
; X64-KNL-NEXT: retq
;
-; X86-KNL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X86-KNL-LABEL: test_gather_structpt2_8f32_mask_index_offset:
; X86-KNL: # %bb.0:
; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; X86-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
@@ -5211,7 +5211,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr
; X86-KNL-NEXT: vmovaps %ymm1, %ymm0
; X86-KNL-NEXT: retl
;
-; X64-SKX-SMALL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-SKX-SMALL-LABEL: test_gather_structpt2_8f32_mask_index_offset:
; X64-SKX-SMALL: # %bb.0:
; X64-SKX-SMALL-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-SKX-SMALL-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5223,7 +5223,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr
; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm0
; X64-SKX-SMALL-NEXT: retq
;
-; X64-SKX-LARGE-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-SKX-LARGE-LABEL: test_gather_structpt2_8f32_mask_index_offset:
; X64-SKX-LARGE: # %bb.0:
; X64-SKX-LARGE-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-SKX-LARGE-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5236,7 +5236,7 @@ define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr
; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm0
; X64-SKX-LARGE-NEXT: retq
;
-; X86-SKX-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X86-SKX-LABEL: test_gather_structpt2_8f32_mask_index_offset:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpmovsxwd %xmm0, %ymm0
; X86-SKX-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5255,8 +5255,8 @@ define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr
ret <8 x float> %res
}
-define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
-; X64-KNL-LABEL: test_gather_8f32_mask_index_pair:
+define {<8 x float>, <8 x float>} @test_gather_structpt2_8f32_mask_index_pair(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt2_8f32_mask_index_pair:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; X64-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
@@ -5270,7 +5270,7 @@ define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
; X64-KNL-NEXT: retq
;
-; X86-KNL-LABEL: test_gather_8f32_mask_index_pair:
+; X86-KNL-LABEL: test_gather_structpt2_8f32_mask_index_pair:
; X86-KNL: # %bb.0:
; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; X86-KNL-NEXT: vpmovsxwq %xmm0, %zmm0
@@ -5288,7 +5288,7 @@ define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr
; X86-KNL-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
; X86-KNL-NEXT: retl
;
-; X64-SKX-SMALL-LABEL: test_gather_8f32_mask_index_pair:
+; X64-SKX-SMALL-LABEL: test_gather_structpt2_8f32_mask_index_pair:
; X64-SKX-SMALL: # %bb.0:
; X64-SKX-SMALL-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-SKX-SMALL-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5302,7 +5302,7 @@ define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
; X64-SKX-SMALL-NEXT: retq
;
-; X64-SKX-LARGE-LABEL: test_gather_8f32_mask_index_pair:
+; X64-SKX-LARGE-LABEL: test_gather_structpt2_8f32_mask_index_pair:
; X64-SKX-LARGE: # %bb.0:
; X64-SKX-LARGE-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-SKX-LARGE-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5317,7 +5317,7 @@ define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
; X64-SKX-LARGE-NEXT: retq
;
-; X86-SKX-LABEL: test_gather_8f32_mask_index_pair:
+; X86-SKX-LABEL: test_gather_structpt2_8f32_mask_index_pair:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpmovsxwd %xmm0, %ymm0
; X86-SKX-NEXT: vpslld $31, %ymm0, %ymm0
@@ -5342,8 +5342,8 @@ define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr
ret {<8 x float>, <8 x float>} %pair2
}
-define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
-; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index1:
+define <16 x float> @test_gather_structpt2_16f32_mask_index(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt2_16f32_mask_index:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5360,7 +5360,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <1
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-KNL-NEXT: retq
;
-; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X86-KNL-LABEL: test_gather_structpt2_16f32_mask_index:
; X86-KNL: # %bb.0:
; X86-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5372,7 +5372,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <1
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
; X86-KNL-NEXT: retl
;
-; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-SKX-SMALL-LABEL: test_gather_structpt2_16f32_mask_index:
; X64-SKX-SMALL: # %bb.0:
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5389,7 +5389,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <1
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: retq
;
-; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-SKX-LARGE-LABEL: test_gather_structpt2_16f32_mask_index:
; X64-SKX-LARGE: # %bb.0:
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5407,7 +5407,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <1
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: retq
;
-; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index1:
+; X86-SKX-LABEL: test_gather_structpt2_16f32_mask_index:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-SKX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5426,8 +5426,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <1
ret <16 x float> %res
}
-define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
-; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+define <16 x float> @test_gather_structpt2_16f32_mask_index_offset(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt2_16f32_mask_index_offset:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5444,7 +5444,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-KNL-NEXT: retq
;
-; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X86-KNL-LABEL: test_gather_structpt2_16f32_mask_index_offset:
; X86-KNL: # %bb.0:
; X86-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5456,7 +5456,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
; X86-KNL-NEXT: retl
;
-; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X64-SKX-SMALL-LABEL: test_gather_structpt2_16f32_mask_index_offset:
; X64-SKX-SMALL: # %bb.0:
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5473,7 +5473,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: retq
;
-; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X64-SKX-LARGE-LABEL: test_gather_structpt2_16f32_mask_index_offset:
; X64-SKX-LARGE: # %bb.0:
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5491,7 +5491,7 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: retq
;
-; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X86-SKX-LABEL: test_gather_structpt2_16f32_mask_index_offset:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-SKX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5510,8 +5510,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %
ret <16 x float> %res
}
-define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair2(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
-; X64-KNL-LABEL: test_gather_16f32_mask_index_pair2:
+define {<16 x float>, <16 x float>} @test_gather_structpt2_16f32_mask_index_pair(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt2_16f32_mask_index_pair:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5535,7 +5535,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair2(ptr %x,
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-KNL-NEXT: retq
;
-; X86-KNL-LABEL: test_gather_16f32_mask_index_pair2:
+; X86-KNL-LABEL: test_gather_structpt2_16f32_mask_index_pair:
; X86-KNL: # %bb.0:
; X86-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5549,7 +5549,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair2(ptr %x,
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
; X86-KNL-NEXT: retl
;
-; X64-SKX-SMALL-LABEL: test_gather_16f32_mask_index_pair2:
+; X64-SKX-SMALL-LABEL: test_gather_structpt2_16f32_mask_index_pair:
; X64-SKX-SMALL: # %bb.0:
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5573,7 +5573,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair2(ptr %x,
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-SKX-SMALL-NEXT: retq
;
-; X64-SKX-LARGE-LABEL: test_gather_16f32_mask_index_pair2:
+; X64-SKX-LARGE-LABEL: test_gather_structpt2_16f32_mask_index_pair:
; X64-SKX-LARGE: # %bb.0:
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
@@ -5598,7 +5598,7 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair2(ptr %x,
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-SKX-LARGE-NEXT: retq
;
-; X86-SKX-LABEL: test_gather_16f32_mask_index_pair2:
+; X86-SKX-LABEL: test_gather_structpt2_16f32_mask_index_pair:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
; X86-SKX-NEXT: vpslld $31, %zmm0, %zmm0
More information about the llvm-commits
mailing list