[llvm] r331767 - [X86] Split WriteIDiv into div/idiv 8/16/32/64 implementations (PR36930)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 8 06:51:45 PDT 2018
Author: rksimon
Date: Tue May 8 06:51:45 2018
New Revision: 331767
URL: http://llvm.org/viewvc/llvm-project?rev=331767&view=rev
Log:
[X86] Split WriteIDiv into div/idiv 8/16/32/64 implementations (PR36930)
I've created the necessary classes but there are still a lot of overrides that need cleaning up.
NOTE: The Znver1 model was missing some div/idiv variants in the instregex patterns and wasn't setting the resource cycles at all in the overrides.
Modified:
llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s
Modified: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrArithmetic.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Tue May 8 06:51:45 2018
@@ -280,70 +280,65 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
-let SchedRW = [WriteIDiv] in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "div{b}\t$src", []>;
+ "div{b}\t$src", []>, Sched<[WriteDiv8]>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "div{w}\t$src", []>, OpSize16;
+ "div{w}\t$src", []>, Sched<[WriteDiv16]>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "div{l}\t$src", []>, OpSize32;
+ "div{l}\t$src", []>, Sched<[WriteDiv32]>, OpSize32;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
- "div{q}\t$src", []>;
-} // SchedRW
+ "div{q}\t$src", []>, Sched<[WriteDiv64]>;
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>, SchedLoadReg<WriteIDivLd>;
+ "div{b}\t$src", []>, SchedLoadReg<WriteDiv8.Folded>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDivLd>;
+ "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16.Folded>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>, SchedLoadReg<WriteIDivLd>, OpSize32;
+ "div{l}\t$src", []>, SchedLoadReg<WriteDiv32.Folded>, OpSize32;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>, SchedLoadReg<WriteIDivLd>,
+ "div{q}\t$src", []>, SchedLoadReg<WriteDiv64.Folded>,
Requires<[In64BitMode]>;
}
// Signed division/remainder.
-let SchedRW = [WriteIDiv] in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "idiv{b}\t$src", []>;
+ "idiv{b}\t$src", []>, Sched<[WriteIDiv8]>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "idiv{w}\t$src", []>, OpSize16;
+ "idiv{w}\t$src", []>, Sched<[WriteIDiv16]>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "idiv{l}\t$src", []>, OpSize32;
+ "idiv{l}\t$src", []>, Sched<[WriteIDiv32]>, OpSize32;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
- "idiv{q}\t$src", []>;
-} // SchedRW
+ "idiv{q}\t$src", []>, Sched<[WriteIDiv64]>;
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>,
- SchedLoadReg<WriteIDivLd>;
+ "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8.Folded>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDivLd>;
+ "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16.Folded>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDivLd>;
+ "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32.Folded>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>, SchedLoadReg<WriteIDivLd>,
+ "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64.Folded>,
Requires<[In64BitMode]>;
}
} // hasSideEffects = 0
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue May 8 06:51:45 2018
@@ -108,7 +108,16 @@ def : WriteRes<WriteRMW, [BWPort237,BWPo
// Arithmetic.
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
-defm : BWWriteResPair<WriteIDiv, [BWPort0, BWDivider], 25, [1, 10]>;
+
+defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteIDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
+
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue May 8 06:51:45 2018
@@ -141,8 +141,14 @@ defm : HWWriteResPair<WritePOPCNT, [HWP
defm : HWWriteResPair<WriteBEXTR, [HWPort06,HWPort15], 2, [1,1], 2>;
defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
-// This is quite rough, latency depends on the dividend.
-defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteIDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteIDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteIDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteIDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLoad, [HWPort23], 5, [1], 1>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue May 8 06:51:45 2018
@@ -103,7 +103,16 @@ def : WriteRes<WriteZero, []>;
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
-defm : SBWriteResPair<WriteIDiv, [SBPort0, SBDivider], 25, [1, 10]>;
+
+defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
+defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
+defm : SBWriteResPair<WriteDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
+defm : SBWriteResPair<WriteDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
+defm : SBWriteResPair<WriteIDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
+defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
+defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
+defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
+
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue May 8 06:51:45 2018
@@ -107,7 +107,16 @@ def : WriteRes<WriteRMW, [SKLPort237,SKL
// Arithmetic.
defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
-defm : SKLWriteResPair<WriteIDiv, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; // Integer division.
+
+defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : SKLWriteResPair<WriteDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : SKLWriteResPair<WriteDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : SKLWriteResPair<WriteIDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : SKLWriteResPair<WriteIDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : SKLWriteResPair<WriteIDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : SKLWriteResPair<WriteIDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+
defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue May 8 06:51:45 2018
@@ -107,7 +107,16 @@ def : WriteRes<WriteRMW, [SKXPort237,SKX
// Arithmetic.
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
-defm : SKXWriteResPair<WriteIDiv, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; // Integer division.
+
+defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : SKXWriteResPair<WriteDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : SKXWriteResPair<WriteDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : SKXWriteResPair<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : SKXWriteResPair<WriteIDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : SKXWriteResPair<WriteIDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : SKXWriteResPair<WriteIDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+
defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue May 8 06:51:45 2018
@@ -78,9 +78,18 @@ defm WriteALU : X86SchedWritePair; // S
def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
defm WriteIMul : X86SchedWritePair; // Integer multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
-defm WriteIDiv : X86SchedWritePair; // Integer division.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+// Integer division.
+defm WriteDiv8 : X86SchedWritePair;
+defm WriteDiv16 : X86SchedWritePair;
+defm WriteDiv32 : X86SchedWritePair;
+defm WriteDiv64 : X86SchedWritePair;
+defm WriteIDiv8 : X86SchedWritePair;
+defm WriteIDiv16 : X86SchedWritePair;
+defm WriteIDiv32 : X86SchedWritePair;
+defm WriteIDiv64 : X86SchedWritePair;
+
defm WriteBitScan : X86SchedWritePair; // Bit scan forward/reverse.
defm WritePOPCNT : X86SchedWritePair; // Bit population count.
defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue May 8 06:51:45 2018
@@ -78,7 +78,16 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
-defm : AtomWriteResPair<WriteIDiv, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+
+defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
+defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
+defm : AtomWriteResPair<WriteDiv32, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
+defm : AtomWriteResPair<WriteDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+defm : AtomWriteResPair<WriteIDiv8, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+
defm : AtomWriteResPair<WriteCRC32, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteCMOV, [AtomPort01], [AtomPort0]>;
@@ -120,27 +129,6 @@ def AtomWriteIMul64I : SchedWriteRes<[At
def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
IMUL64rmi8, IMUL64rmi32)>;
-def AtomWriteDiv : SchedWriteRes<[AtomPort01]> {
- let Latency = 50;
- let ResourceCycles = [50];
-}
-def : InstRW<[AtomWriteDiv], (instrs DIV8r,
- DIV16r, DIV16m,
- DIV32r, DIV32m)>;
-
-def AtomWriteDiv8Ld : SchedWriteRes<[AtomPort01]> {
- let Latency = 68;
- let ResourceCycles = [68];
-}
-def : InstRW<[AtomWriteDiv8Ld], (instrs DIV8m)>;
-
-def AtomWriteIDiv64 : SchedWriteRes<[AtomPort01]> {
- let Latency = 130;
- let ResourceCycles = [130];
-}
-def : InstRW<[AtomWriteIDiv64], (instrs DIV64r, IDIV64r,
- DIV64m, IDIV64m)>;
-
// Bit counts.
defm : AtomWriteResPair<WriteBitScan, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : AtomWriteResPair<WritePOPCNT, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue May 8 06:51:45 2018
@@ -155,10 +155,19 @@ def : WriteRes<WriteRMW, [JSAGU]>;
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
-defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
-defm : JWriteResIntPair<WriteIDiv, [JALU1, JDiv], 41, [1, 41], 2>; // Worst case (i64 division)
-defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
+defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
+defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
+
+defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
+defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
+defm : JWriteResIntPair<WriteDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
+defm : JWriteResIntPair<WriteDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
+defm : JWriteResIntPair<WriteIDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
+defm : JWriteResIntPair<WriteIDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
+defm : JWriteResIntPair<WriteIDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
+defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
+
+defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
defm : JWriteResIntPair<WriteCMOV, [JALU01], 1>; // Conditional move.
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
@@ -196,43 +205,6 @@ def JWriteIMul64Ld : SchedWriteRes<[JLAG
def : InstRW<[JWriteIMul64], (instrs MUL64r, IMUL64r)>;
def : InstRW<[JWriteIMul64Ld], (instrs MUL64m, IMUL64m)>;
-def JWriteIDiv8 : SchedWriteRes<[JALU1, JDiv]> {
- let Latency = 12;
- let ResourceCycles = [1, 12];
-}
-def JWriteIDiv8Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
- let Latency = 15;
- let ResourceCycles = [1, 1, 12];
-}
-def : InstRW<[JWriteIDiv8], (instrs DIV8r, IDIV8r)>;
-def : InstRW<[JWriteIDiv8Ld], (instrs DIV8m, IDIV8m)>;
-
-def JWriteIDiv16 : SchedWriteRes<[JALU1, JDiv]> {
- let Latency = 17;
- let ResourceCycles = [1, 17];
- let NumMicroOps = 2;
-}
-def JWriteIDiv16Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
- let Latency = 20;
- let ResourceCycles = [1, 1, 17];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteIDiv16], (instrs DIV16r, IDIV16r)>;
-def : InstRW<[JWriteIDiv16Ld], (instrs DIV16m, IDIV16m)>;
-
-def JWriteIDiv32 : SchedWriteRes<[JALU1, JDiv]> {
- let Latency = 25;
- let ResourceCycles = [1, 25];
- let NumMicroOps = 2;
-}
-def JWriteIDiv32Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
- let Latency = 28;
- let ResourceCycles = [1, 1, 25];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteIDiv32], (instrs DIV32r, IDIV32r)>;
-def : InstRW<[JWriteIDiv32Ld], (instrs DIV32m, IDIV32m)>;
-
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue May 8 06:51:45 2018
@@ -121,8 +121,14 @@ defm : SLMWriteResPair<WritePOPCNT, [SL
defm : SLMWriteResPair<WriteBEXTR, [SLM_IEC_RSV0], 1>;
defm : SLMWriteResPair<WriteBZHI, [SLM_IEC_RSV0], 1>;
-// This is quite rough, latency depends on the dividend.
-defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
// Scalar and vector floating point.
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue May 8 06:51:45 2018
@@ -172,15 +172,14 @@ defm : ZnWriteResPair<WriteBEXTR, [ZnALU
defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
// IDIV
-def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
- let Latency = 41;
- let ResourceCycles = [1, 41];
-}
-
-def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
- let Latency = 45;
- let ResourceCycles = [1, 4, 41];
-}
+defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
+defm : ZnWriteResPair<WriteDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
+defm : ZnWriteResPair<WriteDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
+defm : ZnWriteResPair<WriteDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
+defm : ZnWriteResPair<WriteIDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
+defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
+defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
+defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
// IMULH
def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
@@ -552,34 +551,6 @@ def ZnWriteMulX64Ld : SchedWriteRes<[ZnA
}
def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
-// DIV, IDIV.
-// r8.
-def ZnWriteDiv8 : SchedWriteRes<[ZnALU2, ZnDivider]> {
- let Latency = 15;
-}
-def : InstRW<[ZnWriteDiv8], (instregex "DIV8r", "IDIV8r")>;
-
-// r16.
-def ZnWriteDiv16 : SchedWriteRes<[ZnALU2, ZnDivider]> {
- let Latency = 17;
- let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteDiv16], (instregex "DIV16r", "IDIV16r")>;
-
-// r32.
-def ZnWriteDiv32 : SchedWriteRes<[ZnALU2, ZnDivider]> {
- let Latency = 25;
- let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteDiv32], (instregex "DIV32r", "IDIV32r")>;
-
-// r64.
-def ZnWriteDiv64 : SchedWriteRes<[ZnALU2, ZnDivider]> {
- let Latency = 41;
- let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteDiv64], (instregex "DIV64r", "IDIV64r")>;
-
//-- Control transfer instructions --//
// J(E|R)CXZ.
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Tue May 8 06:51:45 2018
@@ -5275,13 +5275,13 @@ define void @test_div(i8 %a0, i16 %a1, i
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: divb %dil # sched: [15:1.00]
-; ZNVER1-NEXT: divb (%r8) # sched: [45:41.00]
-; ZNVER1-NEXT: divw %si # sched: [17:1.00]
-; ZNVER1-NEXT: divw (%r9) # sched: [45:41.00]
-; ZNVER1-NEXT: divl %edx # sched: [25:1.00]
-; ZNVER1-NEXT: divl (%rax) # sched: [45:41.00]
-; ZNVER1-NEXT: divq %rcx # sched: [41:1.00]
+; ZNVER1-NEXT: divb %dil # sched: [15:15.00]
+; ZNVER1-NEXT: divb (%r8) # sched: [19:15.00]
+; ZNVER1-NEXT: divw %si # sched: [17:17.00]
+; ZNVER1-NEXT: divw (%r9) # sched: [21:17.00]
+; ZNVER1-NEXT: divl %edx # sched: [25:25.00]
+; ZNVER1-NEXT: divl (%rax) # sched: [29:25.00]
+; ZNVER1-NEXT: divq %rcx # sched: [41:41.00]
; ZNVER1-NEXT: divq (%r10) # sched: [45:41.00]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
@@ -5523,13 +5523,13 @@ define void @test_idiv(i8 %a0, i16 %a1,
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: idivb %dil # sched: [15:1.00]
-; ZNVER1-NEXT: idivb (%r8) # sched: [45:41.00]
-; ZNVER1-NEXT: idivw %si # sched: [17:1.00]
-; ZNVER1-NEXT: idivw (%r9) # sched: [45:41.00]
-; ZNVER1-NEXT: idivl %edx # sched: [25:1.00]
-; ZNVER1-NEXT: idivl (%rax) # sched: [45:41.00]
-; ZNVER1-NEXT: idivq %rcx # sched: [41:1.00]
+; ZNVER1-NEXT: idivb %dil # sched: [15:15.00]
+; ZNVER1-NEXT: idivb (%r8) # sched: [19:15.00]
+; ZNVER1-NEXT: idivw %si # sched: [17:17.00]
+; ZNVER1-NEXT: idivw (%r9) # sched: [21:17.00]
+; ZNVER1-NEXT: idivl %edx # sched: [25:25.00]
+; ZNVER1-NEXT: idivl (%rax) # sched: [29:25.00]
+; ZNVER1-NEXT: idivq %rcx # sched: [41:41.00]
; ZNVER1-NEXT: idivq (%r10) # sched: [45:41.00]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
Modified: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s?rev=331767&r1=331766&r2=331767&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s Tue May 8 06:51:45 2018
@@ -612,22 +612,22 @@ subq (%rax), %rdi
# CHECK-NEXT: 2 5 0.50 * * decl (%rax)
# CHECK-NEXT: 1 1 0.25 decq %rdi
# CHECK-NEXT: 2 5 0.50 * * decq (%rax)
-# CHECK-NEXT: 1 15 1.00 * divb %dil
-# CHECK-NEXT: 1 45 41.00 * * divb (%rax)
-# CHECK-NEXT: 2 17 1.00 * divw %si
-# CHECK-NEXT: 1 45 41.00 * * divw (%rax)
-# CHECK-NEXT: 2 25 1.00 * divl %edx
-# CHECK-NEXT: 1 45 41.00 * * divl (%rax)
-# CHECK-NEXT: 2 41 1.00 * divq %rcx
-# CHECK-NEXT: 1 45 41.00 * * divq (%rax)
-# CHECK-NEXT: 1 15 1.00 * idivb %dil
-# CHECK-NEXT: 1 45 41.00 * * idivb (%rax)
-# CHECK-NEXT: 2 17 1.00 * idivw %si
-# CHECK-NEXT: 1 45 41.00 * * idivw (%rax)
-# CHECK-NEXT: 2 25 1.00 * idivl %edx
-# CHECK-NEXT: 1 45 41.00 * * idivl (%rax)
-# CHECK-NEXT: 2 41 1.00 * idivq %rcx
-# CHECK-NEXT: 1 45 41.00 * * idivq (%rax)
+# CHECK-NEXT: 1 15 15.00 * divb %dil
+# CHECK-NEXT: 2 19 15.00 * * divb (%rax)
+# CHECK-NEXT: 2 17 17.00 * divw %si
+# CHECK-NEXT: 3 21 17.00 * * divw (%rax)
+# CHECK-NEXT: 2 25 25.00 * divl %edx
+# CHECK-NEXT: 3 29 25.00 * * divl (%rax)
+# CHECK-NEXT: 2 41 41.00 * divq %rcx
+# CHECK-NEXT: 3 45 41.00 * * divq (%rax)
+# CHECK-NEXT: 1 15 15.00 * idivb %dil
+# CHECK-NEXT: 2 19 15.00 * * idivb (%rax)
+# CHECK-NEXT: 2 17 17.00 * idivw %si
+# CHECK-NEXT: 3 21 17.00 * * idivw (%rax)
+# CHECK-NEXT: 2 25 25.00 * idivl %edx
+# CHECK-NEXT: 3 29 25.00 * * idivl (%rax)
+# CHECK-NEXT: 2 41 41.00 * idivq %rcx
+# CHECK-NEXT: 3 45 41.00 * * idivq (%rax)
# CHECK-NEXT: 1 4 1.00 imulb %dil
# CHECK-NEXT: 2 8 1.00 * imulb (%rax)
# CHECK-NEXT: 1 3 1.00 imulw %di
@@ -954,7 +954,7 @@ subq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 119.00 119.00 87.00 121.00 103.00 87.00 336.00 - - - - 34.00
+# CHECK-NEXT: 107.00 107.00 87.00 121.00 103.00 87.00 392.00 - - - - 34.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -1074,22 +1074,22 @@ subq (%rax), %rdi
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - decl (%rax)
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - decq %rdi
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - decq (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divb %dil
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divb (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divw %si
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divw (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divl %edx
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divl (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divq %rcx
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divq (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivb %dil
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivb (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivw %si
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivw (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivl %edx
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivl (%rax)
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivq %rcx
-# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivq (%rax)
+# CHECK-NEXT: - - - - 1.00 - 15.00 - - - - - divb %dil
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 15.00 - - - - - divb (%rax)
+# CHECK-NEXT: - - - - 1.00 - 17.00 - - - - - divw %si
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 17.00 - - - - - divw (%rax)
+# CHECK-NEXT: - - - - 1.00 - 25.00 - - - - - divl %edx
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 25.00 - - - - - divl (%rax)
+# CHECK-NEXT: - - - - 1.00 - 41.00 - - - - - divq %rcx
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 41.00 - - - - - divq (%rax)
+# CHECK-NEXT: - - - - 1.00 - 15.00 - - - - - idivb %dil
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 15.00 - - - - - idivb (%rax)
+# CHECK-NEXT: - - - - 1.00 - 17.00 - - - - - idivw %si
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 17.00 - - - - - idivw (%rax)
+# CHECK-NEXT: - - - - 1.00 - 25.00 - - - - - idivl %edx
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 25.00 - - - - - idivl (%rax)
+# CHECK-NEXT: - - - - 1.00 - 41.00 - - - - - idivq %rcx
+# CHECK-NEXT: 0.50 0.50 - - 1.00 - 41.00 - - - - - idivq (%rax)
# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imulb %dil
# CHECK-NEXT: 0.50 0.50 - 1.00 - - - - - - - 1.00 imulb (%rax)
# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imulw %di
More information about the llvm-commits
mailing list