[llvm] r333795 - [X86] Expand the testing of expand and compress intrinsics
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 1 14:59:24 PDT 2018
Author: ctopper
Date: Fri Jun 1 14:59:24 2018
New Revision: 333795
URL: http://llvm.org/viewvc/llvm-project?rev=333795&view=rev
Log:
[X86] Expand the testing of expand and compress intrinsics
The avx512f intrinsic tests were in the avx512vl file. We were also missing some combinations of masking.
This does show that we fail to use the zero masking form of expand loads when the passthru is zero. I'll try to get that fixed shortly.
Modified:
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=333795&r1=333794&r2=333795&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Fri Jun 1 14:59:24 2018
@@ -1,6 +1,475 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
+
+define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+
+define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcompresspd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_compress_pd_512(<8 x double> %data) {
+; CHECK-LABEL: test_compress_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
+ ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
+; CHECK-LABEL: test_compress_store_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %zmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_store_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vcompressps %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
+
+define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcompressps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_compress_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_compress_ps_512(<16 x float> %data) {
+; CHECK-LABEL: test_compress_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
+
+define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
+; CHECK-LABEL: test_compress_store_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %zmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+
+define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcompressq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
+; CHECK-LABEL: test_compress_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
+
+define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
+; CHECK-LABEL: test_compress_store_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %zmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_store_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpcompressd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
+
+define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcompressd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_compress_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
+; CHECK-LABEL: test_compress_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
+
+define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
+; CHECK-LABEL: test_compress_store_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %zmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
+ ret void
+}
+
+define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
+ ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+
+define <8 x double> @test_expand_pd_512(<8 x double> %data) {
+; CHECK-LABEL: test_expand_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vexpandpd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+ ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
+; CHECK-LABEL: test_expand_load_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
+ ret <8 x double> %res
+}
+
+define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_load_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
+
+define <16 x float> @test_expand_ps_512(<16 x float> %data) {
+; CHECK-LABEL: test_expand_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vexpandps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
+
+define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
+; CHECK-LABEL: test_expand_load_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
+ ret <16 x float> %res
+}
+
+define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+
+define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
+; CHECK-LABEL: test_expand_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpexpandq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
+
+define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
+; CHECK-LABEL: test_expand_load_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_load_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
+
+define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
+; CHECK-LABEL: test_expand_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpexpandd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
+
+define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
+; CHECK-LABEL: test_expand_load_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
+ ret <16 x i32> %res
+}
+
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_rcp_ps_512:
; CHECK: ## %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll?rev=333795&r1=333794&r2=333795&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll Fri Jun 1 14:59:24 2018
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vbmi2 | FileCheck %s
-define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
-; CHECK-LABEL: test_expand_load_w_512:
+define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
+; CHECK-LABEL: test_mask_expand_load_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandw (%rdi), %zmm0 {%k1}
@@ -10,21 +10,62 @@ define <32 x i16> @test_expand_load_w_51
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
ret <32 x i16> %res
}
-declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
-define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
-; CHECK-LABEL: test_compress_store_w_512:
+define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpcompressw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
- ret void
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
}
-declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
-define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
-; CHECK-LABEL: test_expand_load_b_512:
+declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
+
+define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
+; CHECK-LABEL: test_expand_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
+; CHECK-LABEL: test_mask_expand_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpexpandw %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
+; CHECK-LABEL: test_maskz_expand_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
+
+define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
+; CHECK-LABEL: test_expand_load_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
+; CHECK-LABEL: test_mask_expand_load_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: vpexpandb (%rdi), %zmm0 {%k1}
@@ -32,64 +73,163 @@ define <64 x i8> @test_expand_load_b_512
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
ret <64 x i8> %res
}
-declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
-define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
-; CHECK-LABEL: test_compress_store_b_512:
+define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rsi, %k1
-; CHECK-NEXT: vpcompressb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandb (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+
+define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
+; CHECK-LABEL: test_expand_b_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
+; CHECK-LABEL: test_mask_expand_b_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vpexpandb %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
+; CHECK-LABEL: test_maskz_expand_b_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
+
+define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
+; CHECK-LABEL: test_expand_load_b_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 -1)
+ ret <64 x i8> %res
+}
+
+define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
+; CHECK-LABEL: test_mask_compress_store_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpcompressw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
ret void
}
-declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
-define <32 x i16> @test_compress_w_512(<32 x i16> %data, <32 x i16> %src, i32 %mask) {
-; CHECK-LABEL: test_compress_w_512:
+declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
+
+define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
+; CHECK-LABEL: test_mask_compress_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src, i32 %mask)
+ %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
ret <32 x i16> %res
}
-declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
-define <64 x i8> @test_compress_b_512(<64 x i8> %data, <64 x i8> %src, i64 %mask) {
-; CHECK-LABEL: test_compress_b_512:
+define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
+; CHECK-LABEL: test_maskz_compress_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
+; CHECK-LABEL: test_compress_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
+
+define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
+; CHECK-LABEL: test_compress_store_w_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %zmm0, (%rdi)
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
+; CHECK-LABEL: test_mask_compress_store_b_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovq %rsi, %k1
+; CHECK-NEXT: vpcompressb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+
+define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
+; CHECK-LABEL: test_mask_compress_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpcompressb %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src, i64 %mask)
+ %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
ret <64 x i8> %res
}
-declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
-define <32 x i16> @test_expand_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
-; CHECK-LABEL: test_expand_w_512:
+define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
+; CHECK-LABEL: test_maskz_compress_b_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
}
-declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
-define <64 x i8> @test_expand_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
-; CHECK-LABEL: test_expand_b_512:
+define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
+; CHECK-LABEL: test_compress_b_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq %rsi, %k1
-; CHECK-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+ %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
ret <64 x i8> %res
}
-declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
+
+declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
+
+define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
+; CHECK-LABEL: test_compress_store_b_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %zmm0, (%rdi)
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 -1)
+ ret void
+}
define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll?rev=333795&r1=333794&r2=333795&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll Fri Jun 1 14:59:24 2018
@@ -1,19 +1,157 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,+avx512vbmi2 | FileCheck %s
-define <16 x i16> @test_compress_w_256(<16 x i16> %src, <16 x i16> %data, i16 %mask) {
-; CHECK-LABEL: test_compress_w_256:
+define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpexpandw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+
+define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
+; CHECK-LABEL: test_expand_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcompressw %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vpexpandw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
- %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src, i16 %mask)
- ret <16 x i16> %res
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
+ ret <8 x i16> %res
}
-declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
-define <8 x i16> @test_compress_w_128(<8 x i16> %data, i8 %mask) {
-; CHECK-LABEL: test_compress_w_128:
+define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
+
+define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) {
+; CHECK-LABEL: test_expand_load_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_load_b_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpexpandb (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_b_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandb (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+
+define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
+; CHECK-LABEL: test_expand_b_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_b_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpexpandb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_b_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
+
+define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) {
+; CHECK-LABEL: test_expand_load_b_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 -1)
+ ret <16 x i8> %res
+}
+
+define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpcompressw %xmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+
+define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcompressw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
@@ -21,21 +159,51 @@ define <8 x i16> @test_compress_w_128(<8
%res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
-define <32 x i8> @test_compress_b_256(<32 x i8> %src, <32 x i8> %data, i32 %mask) {
-; CHECK-LABEL: test_compress_b_256:
+define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
+; CHECK-LABEL: test_compress_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
+
+define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) {
+; CHECK-LABEL: test_compress_store_w_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %xmm0, (%rdi)
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_store_b_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpcompressb %xmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+
+define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcompressb %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vpcompressb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
- %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src, i32 %mask)
- ret <32 x i8> %res
+ %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
+ ret <16 x i8> %res
}
-declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
-define <16 x i8> @test_compress_b_128(<16 x i8> %data, i16 %mask) {
-; CHECK-LABEL: test_compress_b_128:
+define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_compress_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z}
@@ -43,78 +211,154 @@ define <16 x i8> @test_compress_b_128(<1
%res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
-declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
-define <32 x i8> @test_expand_b_256(<32 x i8> %data, <32 x i8> %src, i32 %mask) {
-; CHECK-LABEL: test_expand_b_256:
+define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
+; CHECK-LABEL: test_compress_b_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpexpandb %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
- %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256( <32 x i8> %data, <32 x i8> %src, i32 %mask)
- ret <32 x i8> %res
+ %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
+ ret <16 x i8> %res
}
-declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
-define <16 x i8> @test_expand_b_128(<16 x i8> %data, i16 %mask) {
-; CHECK-LABEL: test_expand_b_128:
+declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
+
+define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) {
+; CHECK-LABEL: test_compress_store_b_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vmovups %xmm0, (%rdi)
; CHECK-NEXT: retq
- %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
- ret <16 x i8> %res
+ call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 -1)
+ ret void
+}
+
+define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_load_w_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpexpandw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_w_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
}
-declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
-define <16 x i16> @test_expand_w_256(<16 x i16> %data, <16 x i16> %src, i16 %mask) {
+declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
+
+define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
; CHECK-LABEL: test_expand_w_256:
; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_expand_w_256:
+; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
- %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256( <16 x i16> %data, <16 x i16> %src, i16 %mask)
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
ret <16 x i16> %res
}
-declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
-define <8 x i16> @test_expand_w_128(<8 x i16> %data, i8 %mask) {
-; CHECK-LABEL: test_expand_w_128:
+define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_expand_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
- ret <8 x i16> %res
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
}
-declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
-define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
+declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
+
+define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) {
; CHECK-LABEL: test_expand_load_w_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpexpandw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: vmovups (%rdi), %ymm0
; CHECK-NEXT: retq
- %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 -1)
ret <16 x i16> %res
}
-declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
-define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
-; CHECK-LABEL: test_expand_load_w_128:
+define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
+; CHECK-LABEL: test_mask_expand_load_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpexpandw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: vpexpandb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
- %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
- ret <8 x i16> %res
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
+ ret <32 x i8> %res
}
-declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
-define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
-; CHECK-LABEL: test_compress_store_w_256:
+define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpexpandb (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
+
+define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
+; CHECK-LABEL: test_expand_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
+; CHECK-LABEL: test_mask_expand_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpexpandb %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
+; CHECK-LABEL: test_maskz_expand_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
+
+define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) {
+; CHECK-LABEL: test_expand_load_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 -1)
+ ret <32 x i8> %res
+}
+
+define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_store_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressw %ymm0, (%rdi) {%k1}
@@ -122,43 +366,51 @@ define void @test_compress_store_w_256(i
call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
ret void
}
+
declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
-define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
-; CHECK-LABEL: test_compress_store_w_128:
+define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
+; CHECK-LABEL: test_mask_compress_w_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpcompressw %xmm0, (%rdi) {%k1}
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcompressw %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
- ret void
+ %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
+ ret <16 x i16> %res
}
-declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
-define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
-; CHECK-LABEL: test_expand_load_b_256:
+define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
+; CHECK-LABEL: test_maskz_compress_w_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpexpandb (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
- ret <32 x i8> %res
+ %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
}
-declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
-define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
-; CHECK-LABEL: test_expand_load_b_128:
+define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
+; CHECK-LABEL: test_compress_w_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpexpandb (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
- ret <16 x i8> %res
+ %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
+ ret <16 x i16> %res
}
-declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
-define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
-; CHECK-LABEL: test_compress_store_b_256:
+declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
+
+define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) {
+; CHECK-LABEL: test_compress_store_w_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %ymm0, (%rdi)
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
+; CHECK-LABEL: test_mask_compress_store_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressb %ymm0, (%rdi) {%k1}
@@ -166,18 +418,48 @@ define void @test_compress_store_b_256(i
call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
ret void
}
+
declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
-define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
-; CHECK-LABEL: test_compress_store_b_128:
+define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
+; CHECK-LABEL: test_mask_compress_b_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpcompressb %xmm0, (%rdi) {%k1}
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcompressb %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+ %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
+; CHECK-LABEL: test_maskz_compress_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
+; CHECK-LABEL: test_compress_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
+
+define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) {
+; CHECK-LABEL: test_compress_store_b_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %ymm0, (%rdi)
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 -1)
ret void
}
-declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=333795&r1=333794&r2=333795&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Fri Jun 1 14:59:24 2018
@@ -1,32 +1,61 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
-define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) {
-; CHECK-LABEL: compr1:
+
+define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_pd_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
+; CHECK-NEXT: vcompresspd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+ call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
ret void
}
-declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
-define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) {
-; CHECK-LABEL: compr2:
+define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_pd_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vcompresspd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcompresspd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
- call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
- ret void
+ %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
+ ret <2 x double> %res
}
-declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_pd_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
+ ret <2 x double> %res
+}
-define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) {
-; CHECK-LABEL: compr3:
+define <2 x double> @test_compress_pd_128(<2 x double> %data) {
+; CHECK-LABEL: test_compress_pd_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
+
+define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) {
+; CHECK-LABEL: test_compress_store_pd_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_ps_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; CHECK-NEXT: vcompressps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
@@ -37,143 +66,215 @@ define void @compr3(i8* %addr, <4 x floa
declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
-define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) {
-; CHECK-LABEL: compr4:
+define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_ps_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcompressps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
- ret <8 x double> %res
+ %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
+ ret <4 x float> %res
}
-declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
-
-define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
-; CHECK-LABEL: compr5:
+define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_ps_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
-; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; CHECK-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
- ret <4 x double> %res
+ %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
}
-declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
-
-define <4 x float> @compr6(<4 x float> %data, i8 %mask) {
-; CHECK-LABEL: compr6:
+define <4 x float> @test_compress_ps_128(<4 x float> %data) {
+; CHECK-LABEL: test_compress_ps_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
+ %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
-define void @compr7(i8* %addr, <8 x double> %data) {
-; CHECK-LABEL: compr7:
+define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) {
+; CHECK-LABEL: test_compress_store_ps_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
+ call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1)
ret void
}
-define <4 x float> @compr8(<4 x float> %data) {
-; CHECK-LABEL: compr8:
+define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_q_128:
; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpcompressq %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
- ret <4 x float> %res
+ call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
+
+define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcompressq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
+ ret <2 x i64> %res
}
-define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) {
-; CHECK-LABEL: compr9:
+define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
+; CHECK-LABEL: test_compress_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
+
+define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) {
+; CHECK-LABEL: test_compress_store_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_d_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
+; CHECK-NEXT: vpcompressd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+ call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
ret void
}
-declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
-define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
-; CHECK-LABEL: compr10:
+define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_d_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
+; CHECK-NEXT: vpcompressd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
ret <4 x i32> %res
}
-
- at xmm = common global <4 x i32> zeroinitializer, align 16
- at k8 = common global i8 0, align 1
-
-define i32 @compr11() {
-; CHECK-LABEL: compr11:
-; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movq _xmm@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A]
-; CHECK-NEXT: ## fixup A - offset: 3, value: _xmm at GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
-; CHECK-NEXT: vmovdqa (%rax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
-; CHECK-NEXT: movq _k8@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A]
-; CHECK-NEXT: ## fixup A - offset: 3, value: _k8 at GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
-; CHECK-NEXT: movzbl (%rax), %eax ## encoding: [0x0f,0xb6,0x00]
-; CHECK-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
-; CHECK-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xd8]
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xe8]
-; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
-entry:
- %.compoundliteral = alloca <2 x i64>, align 16
- %res = alloca <4 x i32>, align 16
- %a0 = load <4 x i32>, <4 x i32>* @xmm, align 16
- %a2 = load i8, i8* @k8, align 1
- %a21 = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %a0, <4 x i32> zeroinitializer, i8 %a2) #2
- store volatile <4 x i32> %a21, <4 x i32>* %res, align 16
- store <2 x i64> zeroinitializer, <2 x i64>* %.compoundliteral, align 16
- ret i32 0
+ %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
+; CHECK-LABEL: test_compress_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
-; Expand
+define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) {
+; CHECK-LABEL: test_compress_store_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1)
+ ret void
+}
-define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
-; CHECK-LABEL: expand1:
+define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_pd_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
+; CHECK-NEXT: vexpandpd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
- ret <8 x double> %res
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
+ ret <2 x double> %res
}
-declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
-
-define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) {
-; CHECK-LABEL: expand2:
+define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_pd_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vexpandpd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
- ret <4 x double> %res
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask)
+ ret <2 x double> %res
}
-declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
+
+define <2 x double> @test_expand_pd_128(<2 x double> %data) {
+; CHECK-LABEL: test_expand_pd_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_pd_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vexpandpd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_pd_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
+
+define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) {
+; CHECK-LABEL: test_expand_load_pd_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1)
+ ret <2 x double> %res
+}
-define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) {
-; CHECK-LABEL: expand3:
+define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_ps_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; CHECK-NEXT: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
@@ -182,108 +283,644 @@ define <4 x float> @expand3(i8* %addr, <
ret <4 x float> %res
}
+define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_ps_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
-define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) {
-; CHECK-LABEL: expand4:
+define <4 x float> @test_expand_ps_128(<4 x float> %data) {
+; CHECK-LABEL: test_expand_ps_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_ps_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vexpandps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_ps_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+
+define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) {
+; CHECK-LABEL: test_expand_load_ps_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1)
+ ret <4 x float> %res
+}
+
+define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpexpandq (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpexpandq (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
+
+define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
+; CHECK-LABEL: test_expand_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpexpandq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
+
+define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) {
+; CHECK-LABEL: test_expand_load_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpexpandd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpexpandd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
+
+define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
+; CHECK-LABEL: test_expand_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpexpandd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+
+define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) {
+; CHECK-LABEL: test_expand_load_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1)
+ ret <4 x i32> %res
+}
+
+define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_pd_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
+; CHECK-NEXT: vcompresspd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
- ret <8 x double> %res
+ call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+ ret void
}
-declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
-define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
-; CHECK-LABEL: expand5:
+define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_compress_pd_256(<4 x double> %data) {
+; CHECK-LABEL: test_compress_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
+ ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
+
+define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) {
+; CHECK-LABEL: test_compress_store_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vcompressps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
+
+define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcompressps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_compress_ps_256(<8 x float> %data) {
+; CHECK-LABEL: test_compress_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
+
+define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) {
+; CHECK-LABEL: test_compress_store_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpcompressq %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
+
+define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcompressq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
+; CHECK-LABEL: test_compress_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
+ ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
+
+define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_compress_store_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1)
+ ret void
+}
+
+define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_store_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpcompressd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
+
+define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_compress_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcompressd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_compress_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
+; CHECK-LABEL: test_compress_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
+
+define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_compress_store_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1)
+ ret void
+}
+
+define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+
+define <4 x double> @test_expand_pd_256(<4 x double> %data) {
+; CHECK-LABEL: test_expand_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_pd_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
-define <4 x float> @expand6(<4 x float> %data, i8 %mask) {
-; CHECK-LABEL: expand6:
+define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) {
+; CHECK-LABEL: test_expand_load_pd_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1)
+ ret <4 x double> %res
+}
+
+define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vexpandps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vexpandps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
+
+define <8 x float> @test_expand_ps_256(<8 x float> %data) {
+; CHECK-LABEL: test_expand_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_ps_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
+; CHECK-NEXT: vexpandps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
- ret <4 x float> %res
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
+ ret <8 x float> %res
}
-declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_ps_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
-define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
-; CHECK-LABEL: expand7:
+define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) {
+; CHECK-LABEL: test_expand_load_ps_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
- ret <8 x double> %res
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1)
+ ret <8 x float> %res
}
-define <4 x float> @expand8(<4 x float> %data) {
-; CHECK-LABEL: expand8:
+define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_q_256:
; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpexpandq (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
- ret <4 x float> %res
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
+ ret <4 x i64> %res
}
-define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) {
-; CHECK-LABEL: expand9:
+define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_q_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpexpandq (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
- ret <8 x i64> %res
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
}
-declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
-define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) {
-; CHECK-LABEL: expand10:
+define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
+; CHECK-LABEL: test_expand_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_q_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
+; CHECK-NEXT: vpexpandq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
- ret <4 x i32> %res
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
+ ret <4 x i64> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
-define <8 x i64> @expand11(i8* %addr) {
-; CHECK-LABEL: expand11:
+define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_expand_load_q_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> undef, i8 -1)
- ret <8 x i64> %res
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1)
+ ret <4 x i64> %res
}
-define <8 x i64> @expand12(i8* %addr, i8 %mask) {
-; CHECK-LABEL: expand12:
+define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_load_d_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07]
+; CHECK-NEXT: vpexpandd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %laddr = bitcast i8* %addr to <8 x i64>*
- %data = load <8 x i64>, <8 x i64>* %laddr, align 1
- %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64>zeroinitializer, i8 %mask)
- ret <8 x i64> %res
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
+ ret <8 x i32> %res
}
-declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> , <8 x i64>, i8)
+define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_load_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpexpandd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
+
+define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
+; CHECK-LABEL: test_expand_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: test_mask_expand_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpexpandd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
+; CHECK-LABEL: test_maskz_expand_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
+
+define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_expand_load_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
+ ret <8 x i32> %res
+}
define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: test_cmpps_256:
More information about the llvm-commits
mailing list