[llvm] r332326 - [X86] Add fast isel tests for some of the avx512 truncate intrinsics to match current clang codegen.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon May 14 21:26:27 PDT 2018
Author: ctopper
Date: Mon May 14 21:26:27 2018
New Revision: 332326
URL: http://llvm.org/viewvc/llvm-project?rev=332326&view=rev
Log:
[X86] Add fast isel tests for some of the avx512 truncate intrinsics to match current clang codegen.
Modified:
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=332326&r1=332325&r2=332326&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Mon May 14 21:26:27 2018
@@ -2169,5 +2169,190 @@ entry:
ret <8 x double> %1
}
+define <2 x i64> @test_mm512_cvtepi32_epi8(<8 x i64> %__A) {
+; X32-LABEL: test_mm512_cvtepi32_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpmovdb %zmm0, %xmm0
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_cvtepi32_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpmovdb %zmm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <16 x i32>
+ %conv.i = trunc <16 x i32> %0 to <16 x i8>
+ %1 = bitcast <16 x i8> %conv.i to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm512_mask_cvtepi32_epi8(<2 x i64> %__O, i16 zeroext %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_mask_cvtepi32_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_cvtepi32_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <16 x i32>
+ %1 = bitcast <2 x i64> %__O to <16 x i8>
+ %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M)
+ %3 = bitcast <16 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm512_maskz_cvtepi32_epi8(i16 zeroext %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_maskz_cvtepi32_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_cvtepi32_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <16 x i32>
+ %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M)
+ %2 = bitcast <16 x i8> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @test_mm512_cvtepi64_epi32(<8 x i64> %__A) {
+; X32-LABEL: test_mm512_cvtepi64_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpmovqd %zmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_cvtepi64_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpmovqd %zmm0, %ymm0
+; X64-NEXT: retq
+entry:
+ %conv.i = trunc <8 x i64> %__A to <8 x i32>
+ %0 = bitcast <8 x i32> %conv.i to <4 x i64>
+ ret <4 x i64> %0
+}
+
+define <4 x i64> @test_mm512_mask_cvtepi64_epi32(<4 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_mask_cvtepi64_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovqd %zmm1, %ymm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_cvtepi64_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovqd %zmm1, %ymm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %conv.i.i = trunc <8 x i64> %__A to <8 x i32>
+ %0 = bitcast <4 x i64> %__O to <8 x i32>
+ %1 = bitcast i8 %__M to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> %0
+ %3 = bitcast <8 x i32> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @test_mm512_maskz_cvtepi64_epi32(i8 zeroext %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_maskz_cvtepi64_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_cvtepi64_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %conv.i.i = trunc <8 x i64> %__A to <8 x i32>
+ %0 = bitcast i8 %__M to <8 x i1>
+ %1 = select <8 x i1> %0, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define <2 x i64> @test_mm512_cvtepi64_epi16(<8 x i64> %__A) {
+; X32-LABEL: test_mm512_cvtepi64_epi16:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpmovqw %zmm0, %xmm0
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_cvtepi64_epi16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpmovqw %zmm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %conv.i = trunc <8 x i64> %__A to <8 x i16>
+ %0 = bitcast <8 x i16> %conv.i to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @test_mm512_mask_cvtepi64_epi16(<2 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_mask_cvtepi64_epi16:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovqw %zmm1, %xmm0 {%k1}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_cvtepi64_epi16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovqw %zmm1, %xmm0 {%k1}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__O to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M)
+ %2 = bitcast <8 x i16> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_mm512_maskz_cvtepi64_epi16(i8 zeroext %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_maskz_cvtepi64_epi16:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_cvtepi64_epi16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
+
!0 = !{i32 1}
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=332326&r1=332325&r2=332326&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Mon May 14 21:26:27 2018
@@ -801,5 +801,65 @@ entry:
ret i32 %4
}
+define <4 x i64> @test_mm512_cvtepi16_epi8(<8 x i64> %__A) {
+; X32-LABEL: test_mm512_cvtepi16_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpmovwb %zmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_cvtepi16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpmovwb %zmm0, %ymm0
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <32 x i16>
+ %conv.i = trunc <32 x i16> %0 to <32 x i8>
+ %1 = bitcast <32 x i8> %conv.i to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm512_mask_cvtepi16_epi8(<4 x i64> %__O, i32 %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_mask_cvtepi16_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_cvtepi16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <32 x i16>
+ %conv.i.i = trunc <32 x i16> %0 to <32 x i8>
+ %1 = bitcast <4 x i64> %__O to <32 x i8>
+ %2 = bitcast i32 %__M to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i8> %conv.i.i, <32 x i8> %1
+ %4 = bitcast <32 x i8> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm512_maskz_cvtepi16_epi8(i32 %__M, <8 x i64> %__A) {
+; X32-LABEL: test_mm512_maskz_cvtepi16_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_cvtepi16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <32 x i16>
+ %conv.i.i = trunc <32 x i16> %0 to <32 x i8>
+ %1 = bitcast i32 %__M to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i8> %conv.i.i, <32 x i8> zeroinitializer
+ %3 = bitcast <32 x i8> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
!0 = !{i32 1}
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll?rev=332326&r1=332325&r2=332326&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll Mon May 14 21:26:27 2018
@@ -811,5 +811,79 @@ define <4 x i64> @test_mm256_maskz_broad
ret <4 x i64> %res2
}
+define <2 x i64> @test_mm256_cvtepi16_epi8(<4 x i64> %__A) {
+; X32-LABEL: test_mm256_cvtepi16_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpmovwb %ymm0, %xmm0
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_cvtepi16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpmovwb %ymm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__A to <16 x i16>
+ %conv.i = trunc <16 x i16> %0 to <16 x i8>
+ %1 = bitcast <16 x i8> %conv.i to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm256_mask_cvtepi16_epi8(<2 x i64> %__O, i16 zeroext %__M, <4 x i64> %__A) {
+; X32-LABEL: test_mm256_mask_cvtepi16_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vpmovwb %ymm1, %xmm0 {%k1}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_cvtepi16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: kmovd %eax, %k1
+; X64-NEXT: vpmovwb %ymm1, %xmm0 {%k1}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %conv1.i = and i16 %__M, 255
+ %0 = bitcast <4 x i64> %__A to <16 x i16>
+ %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
+ %1 = bitcast <2 x i64> %__O to <16 x i8>
+ %2 = bitcast i16 %conv1.i to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1
+ %4 = bitcast <16 x i8> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm256_maskz_cvtepi16_epi8(i16 zeroext %__M, <4 x i64> %__A) {
+; X32-LABEL: test_mm256_maskz_cvtepi16_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_maskz_cvtepi16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: kmovd %eax, %k1
+; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %conv1.i = and i16 %__M, 255
+ %0 = bitcast <4 x i64> %__A to <16 x i16>
+ %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
+ %1 = bitcast i16 %conv1.i to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer
+ %3 = bitcast <16 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
!0 = !{i32 1}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll?rev=332326&r1=332325&r2=332326&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll Mon May 14 21:26:27 2018
@@ -3404,6 +3404,137 @@ entry:
ret <2 x i64> %tmp4
}
+define <2 x i64> @test_mm256_cvtepi32_epi16(<4 x i64> %__A) local_unnamed_addr #0 {
+; X32-LABEL: test_mm256_cvtepi32_epi16:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpmovdw %ymm0, %xmm0
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_cvtepi32_epi16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpmovdw %ymm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__A to <8 x i32>
+ %conv.i = trunc <8 x i32> %0 to <8 x i16>
+ %1 = bitcast <8 x i16> %conv.i to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm256_mask_cvtepi32_epi16(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
+; X32-LABEL: test_mm256_mask_cvtepi32_epi16:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovdw %ymm1, %xmm0 {%k1}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_cvtepi32_epi16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovdw %ymm1, %xmm0 {%k1}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__A to <8 x i32>
+ %1 = bitcast <2 x i64> %__O to <8 x i16>
+ %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> %1, i8 %__M)
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm256_maskz_cvtepi32_epi16(i8 zeroext %__M, <4 x i64> %__A) {
+; X32-LABEL: test_mm256_maskz_cvtepi32_epi16:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_maskz_cvtepi32_epi16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__A to <8 x i32>
+ %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 %__M)
+ %2 = bitcast <8 x i16> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_mm256_cvtepi64_epi32(<4 x i64> %__A) local_unnamed_addr #0 {
+; X32-LABEL: test_mm256_cvtepi64_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpmovqd %ymm0, %xmm0
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_cvtepi64_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpmovqd %ymm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %conv.i = trunc <4 x i64> %__A to <4 x i32>
+ %0 = bitcast <4 x i32> %conv.i to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @test_mm256_mask_cvtepi64_epi32(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
+; X32-LABEL: test_mm256_mask_cvtepi64_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovqd %ymm1, %xmm0 {%k1}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_cvtepi64_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovqd %ymm1, %xmm0 {%k1}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %conv.i.i = trunc <4 x i64> %__A to <4 x i32>
+ %0 = bitcast <2 x i64> %__O to <4 x i32>
+ %1 = bitcast i8 %__M to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %0
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm256_maskz_cvtepi64_epi32(i8 zeroext %__M, <4 x i64> %__A) {
+; X32-LABEL: test_mm256_maskz_cvtepi64_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z}
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_maskz_cvtepi64_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z}
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %conv.i.i = trunc <4 x i64> %__A to <4 x i32>
+ %0 = bitcast i8 %__M to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %1 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer
+ %2 = bitcast <4 x i32> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>)
declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
@@ -3426,5 +3557,6 @@ declare <4 x i32> @llvm.x86.avx512.mask.
declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
!0 = !{i32 1}
More information about the llvm-commits
mailing list