[llvm] 08cf536 - [X86] Add an additional ReadAfterLoad to EVEX FMA instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 9 17:14:04 PDT 2023
Author: Craig Topper
Date: 2023-04-09T17:12:37-07:00
New Revision: 08cf5360c2a5885f3ee879cd4b32fb9a74aae07f
URL: https://github.com/llvm/llvm-project/commit/08cf5360c2a5885f3ee879cd4b32fb9a74aae07f
DIFF: https://github.com/llvm/llvm-project/commit/08cf5360c2a5885f3ee879cd4b32fb9a74aae07f.diff
LOG: [X86] Add an additional ReadAfterLoad to EVEX FMA instructions.
These instructions have 3 sources. 2 of them are registers when
the load is folded. So we need 2 ReadAfterLoad SchedReads.
Added:
Modified:
llvm/lib/Target/X86/X86InstrAVX512.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 49fe951f22d77..5205c1d2974b5 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6818,7 +6818,8 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6828,7 +6829,8 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
(MaskOpNode _.RC:$src2,
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
@@ -6911,7 +6913,8 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6923,7 +6926,8 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
(_.VT (MaskOpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
@@ -7007,7 +7011,8 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a
diff erent place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -7019,7 +7024,8 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
@@ -7097,7 +7103,8 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
+ SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -7115,7 +7122,8 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
+ SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
let Uses = [MXCSR] in
def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -7433,7 +7441,8 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7442,7 +7451,8 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)>,
- T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
} // Constraints = "$src1 = $dst"
More information about the llvm-commits
mailing list