[llvm] r285173 - [AVX-512] Add scalar vfmsub/vfnmsub mask3 intrinsics
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 25 21:59:59 PDT 2016
Author: ctopper
Date: Tue Oct 25 23:59:58 2016
New Revision: 285173
URL: http://llvm.org/viewvc/llvm-project?rev=285173&view=rev
Log:
[AVX-512] Add scalar vfmsub/vfnmsub mask3 intrinsics
Summary: Clang's intrinsic header currently tries to negate the third operand of a vfmadd mask3 in order to create vfmsub, but this fails isel. This patch adds scalar vfmsub and vfnmsub mask3 that we can use instead to avoid the negate. This is consistent with the packed instructions.
Reviewers: igorb, delena
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D25933
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=285173&r1=285172&r2=285173&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Oct 25 23:59:58 2016
@@ -3074,6 +3074,18 @@ let TargetPrefix = "x86" in { // All in
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask3_vfmsub_sd :
+ GCCBuiltin<"__builtin_ia32_vfmsubsd3_mask3">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfmsub_ss :
+ GCCBuiltin<"__builtin_ia32_vfmsubss3_mask3">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
def int_x86_avx512_mask3_vfmsub_pd_128 :
GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">,
Intrinsic<[llvm_v2f64_ty],
@@ -3182,6 +3194,18 @@ let TargetPrefix = "x86" in { // All in
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask3_vfnmsub_sd :
+ GCCBuiltin<"__builtin_ia32_vfnmsubsd3_mask3">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask3_vfnmsub_ss :
+ GCCBuiltin<"__builtin_ia32_vfnmsubss3_mask3">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
def int_x86_avx512_mask_vfnmsub_pd_128 :
GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=285173&r1=285172&r2=285173&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Oct 25 23:59:58 2016
@@ -1521,6 +1521,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
X86ISD::FMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUB_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUB_RND, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
@@ -1539,6 +1541,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUB_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUB_RND, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=285173&r1=285172&r2=285173&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Tue Oct 25 23:59:58 2016
@@ -5694,6 +5694,118 @@ define <4 x float>@test_int_x86_avx512_m
ret <4 x float> %res6
}
+declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
+; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
+; CHECK-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm4
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
+; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
+; CHECK-NEXT: vfmsub213sd {rz-sae}, %xmm2, %xmm0, %xmm1
+; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
+; CHECK-NEXT: vaddpd %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+ %res4 = fadd <2 x double> %res, %res1
+ %res5 = fadd <2 x double> %res2, %res3
+ %res6 = fadd <2 x double> %res4, %res5
+ ret <2 x double> %res6
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
+; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
+; CHECK-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm4
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
+; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
+; CHECK-NEXT: vfmsub213ss {rz-sae}, %xmm2, %xmm0, %xmm1
+; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
+; CHECK-NEXT: vaddps %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res4 = fadd <4 x float> %res, %res1
+ %res5 = fadd <4 x float> %res2, %res3
+ %res6 = fadd <4 x float> %res4, %res5
+ ret <4 x float> %res6
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
+; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
+; CHECK-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm4
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
+; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
+; CHECK-NEXT: vfnmsub213sd {rz-sae}, %xmm2, %xmm0, %xmm1
+; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
+; CHECK-NEXT: vaddpd %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
+ %res4 = fadd <2 x double> %res, %res1
+ %res5 = fadd <2 x double> %res2, %res3
+ %res6 = fadd <2 x double> %res4, %res5
+ ret <2 x double> %res6
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
+; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
+; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm4
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
+; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
+; CHECK-NEXT: vfnmsub213ss {rz-sae}, %xmm2, %xmm0, %xmm1
+; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
+; CHECK-NEXT: vaddps %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res4 = fadd <4 x float> %res, %res1
+ %res5 = fadd <4 x float> %res2, %res3
+ %res6 = fadd <4 x float> %res4, %res5
+ ret <4 x float> %res6
+}
+
define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
; CHECK: ## BB#0:
More information about the llvm-commits
mailing list