[PATCH] D75506: [X86] Fix bug: Scalar FMA intrinsics generate wrong result
LiuChen via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 01:43:22 PST 2020
LiuChen3 created this revision.
LiuChen3 added reviewers: pengfei, craig.topper, LuoYuanke, RKSimon.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.
For example, _mm_maskz_fmadd_sd would generate the following assembly:
vmovapd 48(%rsp), %xmm1
vmovapd 32(%rsp), %xmm2
vmovapd 16(%rsp), %xmm0
kmovw %eax, %k1
vfmadd231sd %xmm2, %xmm1, %xmm0 {%k1} {z} # xmm0 = (xmm1 * xmm2) + xmm0
In some cases it will be optimized as follows:
vmovapd 48(%rsp), %xmm0
vmovapd 32(%rsp), %xmm1
vmovapd 16(%rsp), %xmm2
kmovw %eax, %k1
vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # xmm0 = (xmm1 * xmm0) + xmm2
The upper 64 bit of the result isn't right.
https://reviews.llvm.org/D75506
Files:
llvm/lib/Target/X86/X86InstrAVX512.td
Index: llvm/lib/Target/X86/X86InstrAVX512.td
===================================================================
--- llvm/lib/Target/X86/X86InstrAVX512.td
+++ llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6721,19 +6721,19 @@
let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
- "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
+ "$src3, $src2", "$src2, $src3", (null_frag)>,
AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
let mayLoad = 1 in
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
- "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
+ "$src3, $src2", "$src2, $src3", (null_frag)>,
AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
- OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag)>,
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
let isCodeGenOnly = 1, isCommutable = 1 in {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D75506.247819.patch
Type: text/x-patch
Size: 1433 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200303/43554a4b/attachment.bin>
More information about the llvm-commits
mailing list