[PATCH] D75506: [X86] Fix bug: Scalar FMA intrinsics generate wrong result

LiuChen via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 01:43:22 PST 2020


LiuChen3 created this revision.
LiuChen3 added reviewers: pengfei, craig.topper, LuoYuanke, RKSimon.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.

For example, _mm_maskz_fmadd_sd would generate the following assembly:

vmovapd	48(%rsp), %xmm1
	vmovapd	32(%rsp), %xmm2
	vmovapd	16(%rsp), %xmm0
	kmovw	%eax, %k1
	vfmadd231sd	%xmm2, %xmm1, %xmm0 {%k1} {z} # xmm0 = (xmm1 * xmm2) + xmm0

In some cases it will be optimized as follows:

vmovapd	48(%rsp), %xmm0
	vmovapd	32(%rsp), %xmm1
	vmovapd	16(%rsp), %xmm2
	kmovw	%eax, %k1
	vfmadd213sd	%xmm2, %xmm1, %xmm0 {%k1} {z} # xmm0 = (xmm1 * xmm0) + xmm2

The upper 64 bit of the result  isn't right.


https://reviews.llvm.org/D75506

Files:
  llvm/lib/Target/X86/X86InstrAVX512.td


Index: llvm/lib/Target/X86/X86InstrAVX512.td
===================================================================
--- llvm/lib/Target/X86/X86InstrAVX512.td
+++ llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6721,19 +6721,19 @@
 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
-          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
+          "$src3, $src2", "$src2, $src3", (null_frag)>,
           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
 
   let mayLoad = 1 in
   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
-          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
+          "$src3, $src2", "$src2, $src3", (null_frag)>,
           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
 
   let Uses = [MXCSR] in
   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
-         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
+         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag)>,
          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
 
   let isCodeGenOnly = 1, isCommutable = 1 in {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D75506.247819.patch
Type: text/x-patch
Size: 1433 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200303/43554a4b/attachment.bin>


More information about the llvm-commits mailing list