[llvm] [X86][FP16] Fix masking problem of VF[,C]MADDCSH intrinsics (PR #118071)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 29 01:00:21 PST 2024
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/118071
Fixes: #98306
>From 94b1c80823fdacd6ac3552d8606f95af0d4721f3 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Fri, 29 Nov 2024 16:54:45 +0800
Subject: [PATCH] [X86][FP16] Fix masking problem of VF[,C]MADDCSH intrinsics
Fixes: #98306
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +++
llvm/lib/Target/X86/X86InstrAVX512.td | 6 +++---
llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll | 12 ++++++++++++
3 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 96b03feaa45803..98ae86533628cf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26265,6 +26265,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
if (!NewOp)
NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);
+ if (IntrData->Opc0 == X86ISD::VFMADDCSH ||
+ IntrData->Opc0 == X86ISD::VFCMADDCSH)
+ return getScalarMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
}
case IFMA_OP:
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 299a2a74d86fc3..83a2e981ffd7a8 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -13533,17 +13533,17 @@ let Uses = [MXCSR] in {
multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
bit IsCommutable> {
let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
- defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
+ defm r : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
(ins VR128X:$src2, VR128X:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3",
(v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
Sched<[WriteFMAX]>;
- defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
+ defm m : AVX512_maskable_3src_scalar<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
(ins VR128X:$src2, ssmem:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3",
(v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
- defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
+ defm rb : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
(ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
"$rc, $src3, $src2", "$src2, $src3, $rc",
(v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
diff --git a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
index 43e085f37ff67b..e449c7192e4bfc 100644
--- a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
@@ -277,3 +277,15 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
ret <4 x float> %res
}
+
+define <4 x float> @PR98306() {
+; CHECK-LABEL: PR98306:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kxorw %k0, %k0, %k1
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7]
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9]
+; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 0, i32 4)
+ ret <4 x float> %res
+}
More information about the llvm-commits
mailing list