[llvm] r289350 - [X86] Remove masking from 512-bit VPERMIL intrinsics in preparation for being able to constant fold them in InstCombineCalls like we do for 128/256-bit.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 10 17:26:45 PST 2016
Author: ctopper
Date: Sat Dec 10 19:26:44 2016
New Revision: 289350
URL: http://llvm.org/viewvc/llvm-project?rev=289350&view=rev
Log:
[X86] Remove masking from 512-bit VPERMIL intrinsics in preparation for being able to constant fold them in InstCombineCalls like we do for 128/256-bit.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=289350&r1=289349&r2=289350&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sat Dec 10 19:26:44 2016
@@ -1316,16 +1316,14 @@ let TargetPrefix = "x86" in { // All in
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vpermilvar_pd_512 :
- GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
+ def int_x86_avx512_vpermilvar_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarpd512">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vpermilvar_ps_512 :
- GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ def int_x86_avx512_vpermilvar_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarps512">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
def int_x86_avx512_pshuf_b_512 :
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=289350&r1=289349&r2=289350&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Sat Dec 10 19:26:44 2016
@@ -312,10 +312,7 @@ static bool UpgradeIntrinsicFunction1(Fu
Name == "avx512.mask.sub.pd.256" || // Added in 4.0
Name == "avx512.mask.sub.ps.128" || // Added in 4.0
Name == "avx512.mask.sub.ps.256" || // Added in 4.0
- Name == "avx512.mask.vpermilvar.ps.128" || // Added in 4.0
- Name == "avx512.mask.vpermilvar.ps.256" || // Added in 4.0
- Name == "avx512.mask.vpermilvar.pd.128" || // Added in 4.0
- Name == "avx512.mask.vpermilvar.pd.256" || // Added in 4.0
+ Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
Name.startswith("avx512.mask.psll.d") || // Added in 4.0
Name.startswith("avx512.mask.psll.q") || // Added in 4.0
Name.startswith("avx512.mask.psll.w") || // Added in 4.0
@@ -1673,6 +1670,10 @@ void llvm::UpgradeIntrinsicCall(CallInst
IID = Intrinsic::x86_avx_vpermilvar_ps_256;
else if (Name.endswith("pd.256"))
IID = Intrinsic::x86_avx_vpermilvar_pd_256;
+ else if (Name.endswith("ps.512"))
+ IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
+ else if (Name.endswith("pd.512"))
+ IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
else
llvm_unreachable("Unexpected vpermilvar intrinsic");
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=289350&r1=289349&r2=289350&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat Dec 10 19:26:44 2016
@@ -1255,10 +1255,6 @@ static const IntrinsicData IntrinsicsWi
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
- X86ISD::VPERMILPV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
- X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
@@ -1552,6 +1548,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+ X86_INTRINSIC_DATA(avx512_vpermilvar_pd_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_vpermilvar_ps_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=289350&r1=289349&r2=289350&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Sat Dec 10 19:26:44 2016
@@ -2596,3 +2596,62 @@ define <16 x i32> @test_maskz_valign_d(<
declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
+declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res2, %res3
+ ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+; Test case to make sure we can print shuffle decode comments for constant pool loads.
+define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
+; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=289350&r1=289349&r2=289350&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat Dec 10 19:26:44 2016
@@ -3853,63 +3853,111 @@ define <4 x float>@test_int_x86_avx512_m
ret <4 x float> %res13
}
-declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
-define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
+define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
+ ret <8 x double> %res
+}
+
+define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_mask:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
- %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
- %res3 = fadd <8 x double> %res, %res1
- %res4 = fadd <8 x double> %res2, %res3
- ret <8 x double> %res4
+ %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2
+ ret <8 x double> %res2
}
-declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_maskz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
+ ret <16 x float> %res
+}
-define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_mask:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
- %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
- %res3 = fadd <16 x float> %res, %res1
- %res4 = fadd <16 x float> %res2, %res3
- ret <16 x float> %res4
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
+ ret <16 x float> %res2
}
-; Test case to make sure we can print shuffle decode comments for constant pool loads.
-define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_maskz:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
-; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
+ ret <16 x float> %res2
+}
+
+; Test case to make sure we can print shuffle decode comments for constant pool loads.
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
-; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
-; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
- %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
- %res3 = fadd <16 x float> %res, %res1
- %res4 = fadd <16 x float> %res2, %res3
- ret <16 x float> %res4
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+ ret <16 x float> %res
+}
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
+ ret <16 x float> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
+ ret <16 x float> %res2
}
declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
More information about the llvm-commits
mailing list