[llvm] 8eb4d25 - [X86] Split X87/SSE compare classes into WriteFCom + WriteFComX

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 17 06:17:58 PST 2020


Author: Simon Pilgrim
Date: 2020-01-17T13:53:58Z
New Revision: 8eb4d25a0943bce6e8a4859825dce4f67a4f6384

URL: https://github.com/llvm/llvm-project/commit/8eb4d25a0943bce6e8a4859825dce4f67a4f6384
DIFF: https://github.com/llvm/llvm-project/commit/8eb4d25a0943bce6e8a4859825dce4f67a4f6384.diff

LOG: [X86] Split X87/SSE compare classes into WriteFCom + WriteFComX

Most X87 compare instructions write to the X87 status word, while the SSE (U)COMI compares write to rFLAGS. These are often handled very differently on CPUs (e.g. rFLAGS outputs typically involve a fpu2gpr transfer), and we shouldn't be grouping all these instructions behind a single class - so this patch splits off the SSE compares into a new WriteFComX class (and currently keeps the same behaviours). If there's a need to distinguish between X87 instructions more closely we can investigate that in the future, but as we don't handle any of the X87 side effects at the moment its unlikely to have any notable effect.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/lib/Target/X86/X86InstrSSE.td
    llvm/lib/Target/X86/X86SchedBroadwell.td
    llvm/lib/Target/X86/X86SchedHaswell.td
    llvm/lib/Target/X86/X86SchedSandyBridge.td
    llvm/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/lib/Target/X86/X86Schedule.td
    llvm/lib/Target/X86/X86ScheduleAtom.td
    llvm/lib/Target/X86/X86ScheduleBdVer2.td
    llvm/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/lib/Target/X86/X86ScheduleSLM.td
    llvm/lib/Target/X86/X86ScheduleZnver1.td
    llvm/lib/Target/X86/X86ScheduleZnver2.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 32f012033fb0..1280fc13af4c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8552,7 +8552,7 @@ let Predicates = [HasVLX] in {
 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
                             string OpcodeStr, Domain d,
-                            X86FoldableSchedWrite sched = WriteFCom> {
+                            X86FoldableSchedWrite sched = WriteFComX> {
   let hasSideEffects = 0, Uses = [MXCSR] in
   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,

diff  --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c45f342ed75b..1775d7570daf 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1830,7 +1830,7 @@ let Constraints = "$src1 = $dst" in {
 multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
                          ValueType vt, X86MemOperand x86memop,
                          PatFrag ld_frag, string OpcodeStr, Domain d,
-                         X86FoldableSchedWrite sched = WriteFCom> {
+                         X86FoldableSchedWrite sched = WriteFComX> {
 let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
     ExeDomain = d in {
   def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
@@ -1851,7 +1851,7 @@ multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
                              ValueType vt, Operand memop,
                              ComplexPattern mem_cpat, string OpcodeStr,
                              Domain d,
-                             X86FoldableSchedWrite sched = WriteFCom> {
+                             X86FoldableSchedWrite sched = WriteFComX> {
 let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in {
   def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),

diff  --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 9b1fcaa8a13d..488687288359 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -260,7 +260,8 @@ defm : BWWriteResPair<WriteFCmp64X, [BWPort1],  3, [1], 1, 5>; // Floating point
 defm : BWWriteResPair<WriteFCmp64Y, [BWPort1],  3, [1], 1, 6>; // Floating point double compare (YMM/ZMM).
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 
-defm : BWWriteResPair<WriteFCom,    [BWPort1],  3>; // Floating point compare to flags.
+defm : BWWriteResPair<WriteFCom,    [BWPort1],  3>; // Floating point compare to flags (X87).
+defm : BWWriteResPair<WriteFComX,   [BWPort1],  3>; // Floating point compare to flags (SSE).
 
 defm : BWWriteResPair<WriteFMul,    [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
 defm : BWWriteResPair<WriteFMulX,   [BWPort01], 3, [1], 1, 5>; // Floating point multiplication (XMM).

diff  --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 06f417501b21..bfe551c6d212 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -261,6 +261,7 @@ defm : HWWriteResPair<WriteFCmp64Y, [HWPort1],  3, [1], 1, 7>;
 defm : HWWriteResPair<WriteFCmp64Z, [HWPort1],  3, [1], 1, 7>; // Unsupported = 1
 
 defm : HWWriteResPair<WriteFCom,    [HWPort1],  3>;
+defm : HWWriteResPair<WriteFComX,   [HWPort1],  3>;
 
 defm : HWWriteResPair<WriteFMul,    [HWPort01],  5, [1], 1, 5>;
 defm : HWWriteResPair<WriteFMulX,   [HWPort01],  5, [1], 1, 6>;

diff  --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 26d4d8fa3549..fb1a35b48db1 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -238,6 +238,7 @@ defm : SBWriteResPair<WriteFCmp64Y, [SBPort1],  3, [1], 1, 7>;
 defm : SBWriteResPair<WriteFCmp64Z, [SBPort1],  3, [1], 1, 7>; // Unsupported = 1
 
 defm : SBWriteResPair<WriteFCom,    [SBPort1],  3>;
+defm : SBWriteResPair<WriteFComX,   [SBPort1],  3>;
 
 defm : SBWriteResPair<WriteFMul,    [SBPort0],  5, [1], 1, 6>;
 defm : SBWriteResPair<WriteFMulX,   [SBPort0],  5, [1], 1, 6>;

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 9a511ecc0071..4dc3b199b601 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -255,7 +255,8 @@ defm : SKLWriteResPair<WriteFCmp64X,  [SKLPort01],  4, [1], 1, 6>;
 defm : SKLWriteResPair<WriteFCmp64Y,  [SKLPort01],  4, [1], 1, 7>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 
-defm : SKLWriteResPair<WriteFCom,      [SKLPort0],  2>; // Floating point compare to flags.
+defm : SKLWriteResPair<WriteFCom,      [SKLPort0],  2>; // Floating point compare to flags (X87).
+defm : SKLWriteResPair<WriteFComX,     [SKLPort0],  2>; // Floating point compare to flags (SSE).
 
 defm : SKLWriteResPair<WriteFMul,     [SKLPort01],  4, [1], 1, 5>; // Floating point multiplication.
 defm : SKLWriteResPair<WriteFMulX,    [SKLPort01],  4, [1], 1, 6>;

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index a8c65435ab9b..acaf4c2bdcac 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -255,7 +255,8 @@ defm : SKXWriteResPair<WriteFCmp64X,   [SKXPort01],  4, [1], 1, 6>;
 defm : SKXWriteResPair<WriteFCmp64Y,   [SKXPort01],  4, [1], 1, 7>;
 defm : SKXWriteResPair<WriteFCmp64Z,   [SKXPort05],  4, [1], 1, 7>;
 
-defm : SKXWriteResPair<WriteFCom,       [SKXPort0],  2>; // Floating point compare to flags.
+defm : SKXWriteResPair<WriteFCom,       [SKXPort0],  2>; // Floating point compare to flags (X87).
+defm : SKXWriteResPair<WriteFComX,      [SKXPort0],  2>; // Floating point compare to flags (SSE).
 
 defm : SKXWriteResPair<WriteFMul,      [SKXPort01],  4, [1], 1, 5>; // Floating point multiplication.
 defm : SKXWriteResPair<WriteFMulX,     [SKXPort01],  4, [1], 1, 6>;

diff  --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 95f710061aeb..b09486bf069c 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -250,7 +250,8 @@ defm WriteFCmp64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double
 defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
 defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
 defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
-defm WriteFCom    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point compare to flags.
+defm WriteFCom    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point compare to flags (X87).
+defm WriteFComX   : X86SchedWritePair<ReadAfterVecLd>;  // Floating point compare to flags (SSE).
 defm WriteFMul    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point multiplication.
 defm WriteFMulX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
 defm WriteFMulY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).

diff  --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index b0153ca9da36..d63cca018c6a 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -244,6 +244,7 @@ defm : AtomWriteResPair<WriteFCmp64X,       [AtomPort01], [AtomPort01],  6,  7,
 defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 defm : AtomWriteResPair<WriteFCom,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFComX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
 defm : AtomWriteResPair<WriteFMul,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 defm : AtomWriteResPair<WriteFMulX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
 defm : X86WriteResPairUnsupported<WriteFMulY>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index d7aea3cf4e9d..77f1959048e3 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -766,6 +766,7 @@ defm : PdWriteResYMMPair<WriteFCmp64Y,      [PdFPU0, PdFPFMA],  2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 
 defm : PdWriteResXMMPair<WriteFCom,         [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+defm : PdWriteResXMMPair<WriteFComX,        [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
 
 def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
   let Latency = 6;

diff  --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index d0421d94ee05..23967f320544 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -541,6 +541,7 @@ defm : JWriteResFpuPair<WriteFCmp64X,      [JFPU0, JFPA],  2>;
 defm : JWriteResYMMPair<WriteFCmp64Y,      [JFPU0, JFPA],  2, [2,2], 2>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 defm : JWriteResFpuPair<WriteFCom,  [JFPU0, JFPA, JALU0],  3>;
+defm : JWriteResFpuPair<WriteFComX, [JFPU0, JFPA, JALU0],  3>;
 defm : JWriteResFpuPair<WriteFMul,         [JFPU1, JFPM],  2>;
 defm : JWriteResFpuPair<WriteFMulX,        [JFPU1, JFPM],  2>;
 defm : JWriteResYMMPair<WriteFMulY,        [JFPU1, JFPM],  2, [2,2], 2>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index dcd155ea0e0e..eca794c48d83 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -214,6 +214,7 @@ defm : SLMWriteResPair<WriteFCmp64X,  [SLM_FPC_RSV1], 3>;
 defm : SLMWriteResPair<WriteFCmp64Y,  [SLM_FPC_RSV1], 3>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 defm : SLMWriteResPair<WriteFCom,     [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFComX,    [SLM_FPC_RSV1], 3>;
 defm : SLMWriteResPair<WriteFMul,     [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
 defm : SLMWriteResPair<WriteFMulX,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
 defm : SLMWriteResPair<WriteFMulY,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 06201f4a3a84..089e7aafaf22 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -295,6 +295,7 @@ defm : ZnWriteResFpuPair<WriteFCmp64X,   [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WriteFCmp64Y,   [ZnFPU0],  3>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 defm : ZnWriteResFpuPair<WriteFCom,      [ZnFPU0],  3>;
+defm : ZnWriteResFpuPair<WriteFComX,     [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WriteFBlend,    [ZnFPU01], 1>;
 defm : ZnWriteResFpuPair<WriteFBlendY,   [ZnFPU01], 1>;
 defm : X86WriteResPairUnsupported<WriteFBlendZ>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 4537d9cc7956..e8443172f2e1 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -281,6 +281,7 @@ defm : Zn2WriteResFpuPair<WriteFCmp64X,   [Zn2FPU0],  3>;
 defm : Zn2WriteResFpuPair<WriteFCmp64Y,   [Zn2FPU0],  3>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 defm : Zn2WriteResFpuPair<WriteFCom,      [Zn2FPU0],  3>;
+defm : Zn2WriteResFpuPair<WriteFComX,     [Zn2FPU0],  3>;
 defm : Zn2WriteResFpuPair<WriteFBlend,    [Zn2FPU01], 1>;
 defm : Zn2WriteResFpuPair<WriteFBlendY,   [Zn2FPU01], 1>;
 defm : X86WriteResPairUnsupported<WriteFBlendZ>;


        


More information about the llvm-commits mailing list